xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4e879ede91dffd7f752e2ff7506bc58e6af186e3)
1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2c5d9258eSSatish Balay 
3b51a4376SLisandro Dalcin #include <petsc/private/hashseti.h>
4c6db04a5SJed Brown #include <petscblaslapack.h>
565a92638SMatthew G. Knepley #include <petscsf.h>
679bdfe76SSatish Balay 
77ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
87ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
97ea3e4caSstefano_zampini #endif
107ea3e4caSstefano_zampini 
11985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
127843d17aSBarry Smith {
137843d17aSBarry Smith   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14dfbe8321SBarry Smith   PetscErrorCode    ierr;
15*4e879edeSHong Zhang   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16*4e879edeSHong Zhang   PetscScalar       *va,*vv;
17*4e879edeSHong Zhang   Vec               vB,vA;
18*4e879edeSHong Zhang   const PetscScalar *vb;
197843d17aSBarry Smith 
207843d17aSBarry Smith   PetscFunctionBegin;
21*4e879edeSHong Zhang   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22*4e879edeSHong Zhang   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23*4e879edeSHong Zhang 
24*4e879edeSHong Zhang   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25985db425SBarry Smith   if (idx) {
26*4e879edeSHong Zhang     for (i=0; i<m; i++) {
2726fbe8dcSKarl Rupp       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2826fbe8dcSKarl Rupp     }
29985db425SBarry Smith   }
307843d17aSBarry Smith 
31*4e879edeSHong Zhang   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32*4e879edeSHong Zhang   if (idx) {ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);}
33*4e879edeSHong Zhang   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
347843d17aSBarry Smith 
35*4e879edeSHong Zhang   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36*4e879edeSHong Zhang   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37*4e879edeSHong Zhang   for (i=0; i<m; i++) {
3826fbe8dcSKarl Rupp     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39*4e879edeSHong Zhang       vv[i] = vb[i];
40*4e879edeSHong Zhang       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41*4e879edeSHong Zhang     } else {
42*4e879edeSHong Zhang       vv[i] = va[i];
43*4e879edeSHong Zhang       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44*4e879edeSHong Zhang         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
4526fbe8dcSKarl Rupp     }
467843d17aSBarry Smith   }
47*4e879edeSHong Zhang   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48*4e879edeSHong Zhang   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49*4e879edeSHong Zhang   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50c31cb41cSBarry Smith   ierr = PetscFree(idxb);CHKERRQ(ierr);
51*4e879edeSHong Zhang   ierr = VecDestroy(&vA);CHKERRQ(ierr);
52*4e879edeSHong Zhang   ierr = VecDestroy(&vB);CHKERRQ(ierr);
537843d17aSBarry Smith   PetscFunctionReturn(0);
547843d17aSBarry Smith }
557843d17aSBarry Smith 
567087cfbeSBarry Smith PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
577fc3c18eSBarry Smith {
587fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59dfbe8321SBarry Smith   PetscErrorCode ierr;
607fc3c18eSBarry Smith 
617fc3c18eSBarry Smith   PetscFunctionBegin;
627fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
637fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
647fc3c18eSBarry Smith   PetscFunctionReturn(0);
657fc3c18eSBarry Smith }
667fc3c18eSBarry Smith 
677087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
687fc3c18eSBarry Smith {
697fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70dfbe8321SBarry Smith   PetscErrorCode ierr;
717fc3c18eSBarry Smith 
727fc3c18eSBarry Smith   PetscFunctionBegin;
737fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
747fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
757fc3c18eSBarry Smith   PetscFunctionReturn(0);
767fc3c18eSBarry Smith }
777fc3c18eSBarry Smith 
78537820f0SBarry Smith /*
79537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
8057b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
81e06f6af7SJed Brown    storage of the matrix.  This is done in a non scalable way since the
8257b952d6SSatish Balay    length of colmap equals the global matrix length.
8357b952d6SSatish Balay */
84ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
8557b952d6SSatish Balay {
8657b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
8757b952d6SSatish Balay   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
886849ba73SBarry Smith   PetscErrorCode ierr;
89d0f46423SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
9057b952d6SSatish Balay 
91d64ed03dSBarry Smith   PetscFunctionBegin;
92aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
93e23dfa41SBarry Smith   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
9448e59246SSatish Balay   for (i=0; i<nbs; i++) {
953861aac3SJed Brown     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
9648e59246SSatish Balay   }
9748e59246SSatish Balay #else
98580bdb30SBarry Smith   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
993bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
10148e59246SSatish Balay #endif
1023a40ed3dSBarry Smith   PetscFunctionReturn(0);
10357b952d6SSatish Balay }
10457b952d6SSatish Balay 
105d40312a9SBarry Smith #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
10680c1aa95SSatish Balay   { \
10780c1aa95SSatish Balay     brow = row/bs;  \
10880c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
11080c1aa95SSatish Balay     bcol = col/bs; \
11180c1aa95SSatish Balay     ridx = row % bs; cidx = col % bs; \
112ab26458aSBarry Smith     low  = 0; high = nrow; \
113ab26458aSBarry Smith     while (high-low > 3) { \
114ab26458aSBarry Smith       t = (low+high)/2; \
115ab26458aSBarry Smith       if (rp[t] > bcol) high = t; \
116ab26458aSBarry Smith       else              low  = t; \
117ab26458aSBarry Smith     } \
118ab26458aSBarry Smith     for (_i=low; _i<high; _i++) { \
11980c1aa95SSatish Balay       if (rp[_i] > bcol) break; \
12080c1aa95SSatish Balay       if (rp[_i] == bcol) { \
12180c1aa95SSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
122eada6651SSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
123eada6651SSatish Balay         else                    *bap  = value;  \
124ac7a638eSSatish Balay         goto a_noinsert; \
12580c1aa95SSatish Balay       } \
12680c1aa95SSatish Balay     } \
12789280ab3SLois Curfman McInnes     if (a->nonew == 1) goto a_noinsert; \
128d40312a9SBarry Smith     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
129fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
13080c1aa95SSatish Balay     N = nrow++ - 1;  \
13180c1aa95SSatish Balay     /* shift up all the later entries in this row */ \
132580bdb30SBarry Smith     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133580bdb30SBarry Smith     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134580bdb30SBarry Smith     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
13580c1aa95SSatish Balay     rp[_i]                      = bcol;  \
13680c1aa95SSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
137ac7a638eSSatish Balay a_noinsert:; \
13880c1aa95SSatish Balay     ailen[brow] = nrow; \
13980c1aa95SSatish Balay   }
14057b952d6SSatish Balay 
141d40312a9SBarry Smith #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142ac7a638eSSatish Balay   { \
143ac7a638eSSatish Balay     brow = row/bs;  \
144ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
146ac7a638eSSatish Balay     bcol = col/bs; \
147ac7a638eSSatish Balay     ridx = row % bs; cidx = col % bs; \
148ac7a638eSSatish Balay     low  = 0; high = nrow; \
149ac7a638eSSatish Balay     while (high-low > 3) { \
150ac7a638eSSatish Balay       t = (low+high)/2; \
151ac7a638eSSatish Balay       if (rp[t] > bcol) high = t; \
152ac7a638eSSatish Balay       else              low  = t; \
153ac7a638eSSatish Balay     } \
154ac7a638eSSatish Balay     for (_i=low; _i<high; _i++) { \
155ac7a638eSSatish Balay       if (rp[_i] > bcol) break; \
156ac7a638eSSatish Balay       if (rp[_i] == bcol) { \
157ac7a638eSSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
158ac7a638eSSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
159ac7a638eSSatish Balay         else                    *bap  = value;  \
160ac7a638eSSatish Balay         goto b_noinsert; \
161ac7a638eSSatish Balay       } \
162ac7a638eSSatish Balay     } \
16389280ab3SLois Curfman McInnes     if (b->nonew == 1) goto b_noinsert; \
164d40312a9SBarry Smith     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
165fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166ac7a638eSSatish Balay     N = nrow++ - 1;  \
167ac7a638eSSatish Balay     /* shift up all the later entries in this row */ \
168580bdb30SBarry Smith     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169580bdb30SBarry Smith     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170580bdb30SBarry Smith     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171ac7a638eSSatish Balay     rp[_i]                      = bcol;  \
172ac7a638eSSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
173ac7a638eSSatish Balay b_noinsert:; \
174ac7a638eSSatish Balay     bilen[brow] = nrow; \
175ac7a638eSSatish Balay   }
176ac7a638eSSatish Balay 
177b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
17857b952d6SSatish Balay {
17957b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
18093fea6afSBarry Smith   MatScalar      value;
181ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
182dfbe8321SBarry Smith   PetscErrorCode ierr;
183b24ad042SBarry Smith   PetscInt       i,j,row,col;
184d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
185d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186d0f46423SBarry Smith   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
18757b952d6SSatish Balay 
188eada6651SSatish Balay   /* Some Variables required in the macro */
18980c1aa95SSatish Balay   Mat         A     = baij->A;
19080c1aa95SSatish Balay   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191b24ad042SBarry Smith   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
1923eda8832SBarry Smith   MatScalar   *aa   =a->a;
193ac7a638eSSatish Balay 
194ac7a638eSSatish Balay   Mat         B     = baij->B;
195ac7a638eSSatish Balay   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196b24ad042SBarry Smith   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
1973eda8832SBarry Smith   MatScalar   *ba   =b->a;
198ac7a638eSSatish Balay 
199b24ad042SBarry Smith   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200b24ad042SBarry Smith   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
2013eda8832SBarry Smith   MatScalar *ap,*bap;
20280c1aa95SSatish Balay 
203d64ed03dSBarry Smith   PetscFunctionBegin;
20457b952d6SSatish Balay   for (i=0; i<m; i++) {
2055ef9f2a5SBarry Smith     if (im[i] < 0) continue;
206cf9c20a2SJed Brown     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
20757b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
20857b952d6SSatish Balay       row = im[i] - rstart_orig;
20957b952d6SSatish Balay       for (j=0; j<n; j++) {
21057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) {
21157b952d6SSatish Balay           col = in[j] - cstart_orig;
212db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
213db4deed7SKarl Rupp           else             value = v[i+j*m];
214d40312a9SBarry Smith           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
21573959e64SBarry Smith         } else if (in[j] < 0) continue;
2169245e749SBarry Smith         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
2179245e749SBarry Smith         else {
21857b952d6SSatish Balay           if (mat->was_assembled) {
219905e6a2fSBarry Smith             if (!baij->colmap) {
220ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221905e6a2fSBarry Smith             }
222aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2230f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224bba1ac68SSatish Balay             col  = col - 1;
22548e59246SSatish Balay #else
226bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
22748e59246SSatish Balay #endif
228c9ef50b2SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
2308295de27SSatish Balay               col  =  in[j];
2319bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2329bf004c3SSatish Balay               B    = baij->B;
2339bf004c3SSatish Balay               b    = (Mat_SeqBAIJ*)(B)->data;
2349bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2359bf004c3SSatish Balay               ba   =b->a;
236c9ef50b2SBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
237c9ef50b2SBarry Smith             else col += in[j]%bs;
2388295de27SSatish Balay           } else col = in[j];
239db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
240db4deed7SKarl Rupp           else             value = v[i+j*m];
241d40312a9SBarry Smith           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
24290da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
24357b952d6SSatish Balay         }
24457b952d6SSatish Balay       }
245d64ed03dSBarry Smith     } else {
2464cb17eb5SBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
24790f02eecSBarry Smith       if (!baij->donotstash) {
2485080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
249ff2fd236SBarry Smith         if (roworiented) {
250b400d20cSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251ff2fd236SBarry Smith         } else {
252b400d20cSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
25357b952d6SSatish Balay         }
25457b952d6SSatish Balay       }
25557b952d6SSatish Balay     }
25690f02eecSBarry Smith   }
2573a40ed3dSBarry Smith   PetscFunctionReturn(0);
25857b952d6SSatish Balay }
25957b952d6SSatish Balay 
2608ab52850SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261880c6e6aSBarry Smith {
262880c6e6aSBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
2638ab52850SBarry Smith   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264880c6e6aSBarry Smith   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265880c6e6aSBarry Smith   PetscErrorCode    ierr;
2668ab52850SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267880c6e6aSBarry Smith   PetscBool         roworiented=a->roworiented;
268880c6e6aSBarry Smith   const PetscScalar *value     = v;
269880c6e6aSBarry Smith   MatScalar         *ap,*aa = a->a,*bap;
270880c6e6aSBarry Smith 
271880c6e6aSBarry Smith   PetscFunctionBegin;
272880c6e6aSBarry Smith   rp   = aj + ai[row];
273880c6e6aSBarry Smith   ap   = aa + bs2*ai[row];
274880c6e6aSBarry Smith   rmax = imax[row];
275880c6e6aSBarry Smith   nrow = ailen[row];
2768ab52850SBarry Smith   value = v;
2778ab52850SBarry Smith   low = 0;
2788ab52850SBarry Smith   high = nrow;
279880c6e6aSBarry Smith   while (high-low > 7) {
280880c6e6aSBarry Smith     t = (low+high)/2;
281880c6e6aSBarry Smith     if (rp[t] > col) high = t;
282880c6e6aSBarry Smith     else             low  = t;
283880c6e6aSBarry Smith   }
284880c6e6aSBarry Smith   for (i=low; i<high; i++) {
285880c6e6aSBarry Smith     if (rp[i] > col) break;
286880c6e6aSBarry Smith     if (rp[i] == col) {
287880c6e6aSBarry Smith       bap = ap +  bs2*i;
288880c6e6aSBarry Smith       if (roworiented) {
289880c6e6aSBarry Smith         if (is == ADD_VALUES) {
2908ab52850SBarry Smith           for (ii=0; ii<bs; ii++) {
291880c6e6aSBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
292880c6e6aSBarry Smith               bap[jj] += *value++;
293880c6e6aSBarry Smith             }
294880c6e6aSBarry Smith           }
295880c6e6aSBarry Smith         } else {
2968ab52850SBarry Smith           for (ii=0; ii<bs; ii++) {
297880c6e6aSBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
298880c6e6aSBarry Smith               bap[jj] = *value++;
299880c6e6aSBarry Smith             }
300880c6e6aSBarry Smith           }
301880c6e6aSBarry Smith         }
302880c6e6aSBarry Smith       } else {
303880c6e6aSBarry Smith         if (is == ADD_VALUES) {
3048ab52850SBarry Smith           for (ii=0; ii<bs; ii++,value+=bs) {
305880c6e6aSBarry Smith             for (jj=0; jj<bs; jj++) {
306880c6e6aSBarry Smith               bap[jj] += value[jj];
307880c6e6aSBarry Smith             }
308880c6e6aSBarry Smith             bap += bs;
309880c6e6aSBarry Smith           }
310880c6e6aSBarry Smith         } else {
3118ab52850SBarry Smith           for (ii=0; ii<bs; ii++,value+=bs) {
312880c6e6aSBarry Smith             for (jj=0; jj<bs; jj++) {
313880c6e6aSBarry Smith               bap[jj]  = value[jj];
314880c6e6aSBarry Smith             }
315880c6e6aSBarry Smith             bap += bs;
316880c6e6aSBarry Smith           }
317880c6e6aSBarry Smith         }
318880c6e6aSBarry Smith       }
319880c6e6aSBarry Smith       goto noinsert2;
320880c6e6aSBarry Smith     }
321880c6e6aSBarry Smith   }
322880c6e6aSBarry Smith   if (nonew == 1) goto noinsert2;
323880c6e6aSBarry Smith   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
324880c6e6aSBarry Smith   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325880c6e6aSBarry Smith   N = nrow++ - 1; high++;
326880c6e6aSBarry Smith   /* shift up all the later entries in this row */
327580bdb30SBarry Smith   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328580bdb30SBarry Smith   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329880c6e6aSBarry Smith   rp[i] = col;
330880c6e6aSBarry Smith   bap   = ap +  bs2*i;
331880c6e6aSBarry Smith   if (roworiented) {
3328ab52850SBarry Smith     for (ii=0; ii<bs; ii++) {
333880c6e6aSBarry Smith       for (jj=ii; jj<bs2; jj+=bs) {
334880c6e6aSBarry Smith         bap[jj] = *value++;
335880c6e6aSBarry Smith       }
336880c6e6aSBarry Smith     }
337880c6e6aSBarry Smith   } else {
3388ab52850SBarry Smith     for (ii=0; ii<bs; ii++) {
339880c6e6aSBarry Smith       for (jj=0; jj<bs; jj++) {
340880c6e6aSBarry Smith         *bap++ = *value++;
341880c6e6aSBarry Smith       }
342880c6e6aSBarry Smith     }
343880c6e6aSBarry Smith   }
344880c6e6aSBarry Smith   noinsert2:;
345880c6e6aSBarry Smith   ailen[row] = nrow;
346880c6e6aSBarry Smith   PetscFunctionReturn(0);
347880c6e6aSBarry Smith }
348880c6e6aSBarry Smith 
3498ab52850SBarry Smith /*
3508ab52850SBarry Smith     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
3518ab52850SBarry Smith     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
3528ab52850SBarry Smith */
35397e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354ab26458aSBarry Smith {
355ab26458aSBarry Smith   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356dd6ea824SBarry Smith   const PetscScalar *value;
357f15d580aSBarry Smith   MatScalar         *barray     = baij->barray;
358ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
359dfbe8321SBarry Smith   PetscErrorCode    ierr;
360899cda47SBarry Smith   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361899cda47SBarry Smith   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362d0f46423SBarry Smith   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363ab26458aSBarry Smith 
364b16ae2b1SBarry Smith   PetscFunctionBegin;
36530793edcSSatish Balay   if (!barray) {
366785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
36782502324SSatish Balay     baij->barray = barray;
36830793edcSSatish Balay   }
36930793edcSSatish Balay 
37026fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
37126fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
37226fbe8dcSKarl Rupp 
373ab26458aSBarry Smith   for (i=0; i<m; i++) {
3745ef9f2a5SBarry Smith     if (im[i] < 0) continue;
375cf9c20a2SJed Brown     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
376ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
377ab26458aSBarry Smith       row = im[i] - rstart;
378ab26458aSBarry Smith       for (j=0; j<n; j++) {
37915b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
38015b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
381f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
38215b57d14SSatish Balay         } else if ((!roworiented) && (m == 1)) {
383f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
38415b57d14SSatish Balay         } else { /* Here a copy is required */
385ab26458aSBarry Smith           if (roworiented) {
38653ef36baSBarry Smith             value = v + (i*(stepval+bs) + j)*bs;
387ab26458aSBarry Smith           } else {
38853ef36baSBarry Smith             value = v + (j*(stepval+bs) + i)*bs;
389abef11f7SSatish Balay           }
39053ef36baSBarry Smith           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
39126fbe8dcSKarl Rupp             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
39253ef36baSBarry Smith             barray += bs;
39347513183SBarry Smith           }
39430793edcSSatish Balay           barray -= bs2;
39515b57d14SSatish Balay         }
396abef11f7SSatish Balay 
397abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend) {
398abef11f7SSatish Balay           col  = in[j] - cstart;
3998ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
40026fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
4019245e749SBarry Smith         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
4029245e749SBarry Smith         else {
403ab26458aSBarry Smith           if (mat->was_assembled) {
404ab26458aSBarry Smith             if (!baij->colmap) {
405ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406ab26458aSBarry Smith             }
407a5eb4965SSatish Balay 
4082515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
409aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
410b24ad042SBarry Smith             { PetscInt data;
4110f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413fa46199cSSatish Balay             }
41448e59246SSatish Balay #else
415e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416a5eb4965SSatish Balay #endif
41748e59246SSatish Balay #endif
418aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
4190f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420fa46199cSSatish Balay             col  = (col - 1)/bs;
42148e59246SSatish Balay #else
422a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
42348e59246SSatish Balay #endif
4240e9bae81SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426ab26458aSBarry Smith               col  =  in[j];
427bb003d0fSBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
428db4deed7SKarl Rupp           } else col = in[j];
4298ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430ab26458aSBarry Smith         }
431ab26458aSBarry Smith       }
432d64ed03dSBarry Smith     } else {
433bb003d0fSBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434ab26458aSBarry Smith       if (!baij->donotstash) {
435ff2fd236SBarry Smith         if (roworiented) {
4366fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437ff2fd236SBarry Smith         } else {
4386fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439ff2fd236SBarry Smith         }
440abef11f7SSatish Balay       }
441ab26458aSBarry Smith     }
442ab26458aSBarry Smith   }
4433a40ed3dSBarry Smith   PetscFunctionReturn(0);
444ab26458aSBarry Smith }
4456fa18ffdSBarry Smith 
4460bdbc534SSatish Balay #define HASH_KEY 0.6180339887
447b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
45097e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
4510bdbc534SSatish Balay {
4520bdbc534SSatish Balay   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
454dfbe8321SBarry Smith   PetscErrorCode ierr;
455b24ad042SBarry Smith   PetscInt       i,j,row,col;
456d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
457d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458d0f46423SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459329f5518SBarry Smith   PetscReal      tmp;
4603eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
461b24ad042SBarry Smith   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4620bdbc534SSatish Balay 
4630bdbc534SSatish Balay   PetscFunctionBegin;
4640bdbc534SSatish Balay   for (i=0; i<m; i++) {
46576bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {
466e32f2f54SBarry Smith       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467e32f2f54SBarry Smith       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
46876bd3646SJed Brown     }
4690bdbc534SSatish Balay     row = im[i];
470c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4710bdbc534SSatish Balay       for (j=0; j<n; j++) {
4720bdbc534SSatish Balay         col = in[j];
473db4deed7SKarl Rupp         if (roworiented) value = v[i*n+j];
474db4deed7SKarl Rupp         else             value = v[i+j*m];
475b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
476c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
477c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4780bdbc534SSatish Balay 
479c2760754SSatish Balay 
480c2760754SSatish Balay         idx = h1;
48176bd3646SJed Brown         if (PetscDefined(USE_DEBUG)) {
482187ce0cbSSatish Balay           insert_ct++;
483187ce0cbSSatish Balay           total_ct++;
484187ce0cbSSatish Balay           if (HT[idx] != key) {
485187ce0cbSSatish Balay             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
486187ce0cbSSatish Balay             if (idx == size) {
487187ce0cbSSatish Balay               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
488f23aa3ddSBarry Smith               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
489187ce0cbSSatish Balay             }
490187ce0cbSSatish Balay           }
49176bd3646SJed Brown         } else if (HT[idx] != key) {
492c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
493c2760754SSatish Balay           if (idx == size) {
494c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
495f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
496c2760754SSatish Balay           }
497c2760754SSatish Balay         }
498c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
499c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
500c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
5010bdbc534SSatish Balay       }
50226fbe8dcSKarl Rupp     } else if (!baij->donotstash) {
503ff2fd236SBarry Smith       if (roworiented) {
504b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
505ff2fd236SBarry Smith       } else {
506b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
5070bdbc534SSatish Balay       }
5080bdbc534SSatish Balay     }
5090bdbc534SSatish Balay   }
51076bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
511abf3b562SBarry Smith     baij->ht_total_ct  += total_ct;
512abf3b562SBarry Smith     baij->ht_insert_ct += insert_ct;
51376bd3646SJed Brown   }
5140bdbc534SSatish Balay   PetscFunctionReturn(0);
5150bdbc534SSatish Balay }
5160bdbc534SSatish Balay 
51797e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
5180bdbc534SSatish Balay {
5190bdbc534SSatish Balay   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
520ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
521dfbe8321SBarry Smith   PetscErrorCode    ierr;
522b24ad042SBarry Smith   PetscInt          i,j,ii,jj,row,col;
523899cda47SBarry Smith   PetscInt          rstart=baij->rstartbs;
524d0f46423SBarry Smith   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
525b24ad042SBarry Smith   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
526329f5518SBarry Smith   PetscReal         tmp;
5273eda8832SBarry Smith   MatScalar         **HD = baij->hd,*baij_a;
528dd6ea824SBarry Smith   const PetscScalar *v_t,*value;
529b24ad042SBarry Smith   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5300bdbc534SSatish Balay 
531d0a41580SSatish Balay   PetscFunctionBegin;
53226fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
53326fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
53426fbe8dcSKarl Rupp 
5350bdbc534SSatish Balay   for (i=0; i<m; i++) {
53676bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {
537e32f2f54SBarry Smith       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
538e32f2f54SBarry Smith       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
53976bd3646SJed Brown     }
5400bdbc534SSatish Balay     row = im[i];
541ab715e2cSSatish Balay     v_t = v + i*nbs2;
542c2760754SSatish Balay     if (row >= rstart && row < rend) {
5430bdbc534SSatish Balay       for (j=0; j<n; j++) {
5440bdbc534SSatish Balay         col = in[j];
5450bdbc534SSatish Balay 
5460bdbc534SSatish Balay         /* Look up into the Hash Table */
547c2760754SSatish Balay         key = row*Nbs+col+1;
548c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5490bdbc534SSatish Balay 
550c2760754SSatish Balay         idx = h1;
55176bd3646SJed Brown         if (PetscDefined(USE_DEBUG)) {
552187ce0cbSSatish Balay           total_ct++;
553187ce0cbSSatish Balay           insert_ct++;
554187ce0cbSSatish Balay           if (HT[idx] != key) {
555187ce0cbSSatish Balay             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
556187ce0cbSSatish Balay             if (idx == size) {
557187ce0cbSSatish Balay               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
558f23aa3ddSBarry Smith               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
559187ce0cbSSatish Balay             }
560187ce0cbSSatish Balay           }
56176bd3646SJed Brown         } else if (HT[idx] != key) {
562c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
563c2760754SSatish Balay           if (idx == size) {
564c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
565f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
566c2760754SSatish Balay           }
567c2760754SSatish Balay         }
568c2760754SSatish Balay         baij_a = HD[idx];
5690bdbc534SSatish Balay         if (roworiented) {
570c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
571187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
572187ce0cbSSatish Balay           value = v_t;
573187ce0cbSSatish Balay           v_t  += bs;
574fef45726SSatish Balay           if (addv == ADD_VALUES) {
575c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
576c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
577fef45726SSatish Balay                 baij_a[jj] += *value++;
578b4cc0f5aSSatish Balay               }
579b4cc0f5aSSatish Balay             }
580fef45726SSatish Balay           } else {
581c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
582c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
583fef45726SSatish Balay                 baij_a[jj] = *value++;
584fef45726SSatish Balay               }
585fef45726SSatish Balay             }
586fef45726SSatish Balay           }
5870bdbc534SSatish Balay         } else {
5880bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
589fef45726SSatish Balay           if (addv == ADD_VALUES) {
590b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
5910bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
592fef45726SSatish Balay                 baij_a[jj] += *value++;
593fef45726SSatish Balay               }
594fef45726SSatish Balay             }
595fef45726SSatish Balay           } else {
596fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
597fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
598fef45726SSatish Balay                 baij_a[jj] = *value++;
599fef45726SSatish Balay               }
600b4cc0f5aSSatish Balay             }
6010bdbc534SSatish Balay           }
6020bdbc534SSatish Balay         }
6030bdbc534SSatish Balay       }
6040bdbc534SSatish Balay     } else {
6050bdbc534SSatish Balay       if (!baij->donotstash) {
6060bdbc534SSatish Balay         if (roworiented) {
6078798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6080bdbc534SSatish Balay         } else {
6098798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6100bdbc534SSatish Balay         }
6110bdbc534SSatish Balay       }
6120bdbc534SSatish Balay     }
6130bdbc534SSatish Balay   }
61476bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
615abf3b562SBarry Smith     baij->ht_total_ct  += total_ct;
616abf3b562SBarry Smith     baij->ht_insert_ct += insert_ct;
61776bd3646SJed Brown   }
6180bdbc534SSatish Balay   PetscFunctionReturn(0);
6190bdbc534SSatish Balay }
620133cdb44SSatish Balay 
621b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
622d6de1c52SSatish Balay {
623d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
6246849ba73SBarry Smith   PetscErrorCode ierr;
625d0f46423SBarry Smith   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
626d0f46423SBarry Smith   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
627d6de1c52SSatish Balay 
628133cdb44SSatish Balay   PetscFunctionBegin;
629d6de1c52SSatish Balay   for (i=0; i<m; i++) {
630e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
631e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
632d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
633d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
634d6de1c52SSatish Balay       for (j=0; j<n; j++) {
635e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
636e32f2f54SBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
637d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
638d6de1c52SSatish Balay           col  = idxn[j] - bscstart;
63998dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
640d64ed03dSBarry Smith         } else {
641905e6a2fSBarry Smith           if (!baij->colmap) {
642ab9863d7SBarry Smith             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
643905e6a2fSBarry Smith           }
644aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
6450f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
646fa46199cSSatish Balay           data--;
64748e59246SSatish Balay #else
64848e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
64948e59246SSatish Balay #endif
65048e59246SSatish Balay           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
651d9d09a02SSatish Balay           else {
65248e59246SSatish Balay             col  = data + idxn[j]%bs;
65398dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
654d6de1c52SSatish Balay           }
655d6de1c52SSatish Balay         }
656d6de1c52SSatish Balay       }
657f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
658d6de1c52SSatish Balay   }
6593a40ed3dSBarry Smith   PetscFunctionReturn(0);
660d6de1c52SSatish Balay }
661d6de1c52SSatish Balay 
662dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
663d6de1c52SSatish Balay {
664d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
665d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
666dfbe8321SBarry Smith   PetscErrorCode ierr;
667d0f46423SBarry Smith   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
668329f5518SBarry Smith   PetscReal      sum = 0.0;
6693eda8832SBarry Smith   MatScalar      *v;
670d6de1c52SSatish Balay 
671d64ed03dSBarry Smith   PetscFunctionBegin;
672d6de1c52SSatish Balay   if (baij->size == 1) {
673064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
674d6de1c52SSatish Balay   } else {
675d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
676d6de1c52SSatish Balay       v  = amat->a;
6778a62d963SHong Zhang       nz = amat->nz*bs2;
6788a62d963SHong Zhang       for (i=0; i<nz; i++) {
679329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
680d6de1c52SSatish Balay       }
681d6de1c52SSatish Balay       v  = bmat->a;
6828a62d963SHong Zhang       nz = bmat->nz*bs2;
6838a62d963SHong Zhang       for (i=0; i<nz; i++) {
684329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
685d6de1c52SSatish Balay       }
686b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
6878f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
6888a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
6898a62d963SHong Zhang       PetscReal *tmp,*tmp2;
690899cda47SBarry Smith       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
6918f8f2f0dSBarry Smith       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
692857a15f1SBarry Smith       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
6938a62d963SHong Zhang       v    = amat->a; jj = amat->j;
6948a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
6958a62d963SHong Zhang         for (j=0; j<bs; j++) {
6968a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
6978a62d963SHong Zhang           for (row=0; row<bs; row++) {
6988a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
6998a62d963SHong Zhang           }
7008a62d963SHong Zhang         }
7018a62d963SHong Zhang         jj++;
7028a62d963SHong Zhang       }
7038a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
7048a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
7058a62d963SHong Zhang         for (j=0; j<bs; j++) {
7068a62d963SHong Zhang           col = bs*garray[*jj] + j;
7078a62d963SHong Zhang           for (row=0; row<bs; row++) {
7088a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
7098a62d963SHong Zhang           }
7108a62d963SHong Zhang         }
7118a62d963SHong Zhang         jj++;
7128a62d963SHong Zhang       }
713b2566f29SBarry Smith       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
7148a62d963SHong Zhang       *nrm = 0.0;
715d0f46423SBarry Smith       for (j=0; j<mat->cmap->N; j++) {
7168a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
7178a62d963SHong Zhang       }
718857a15f1SBarry Smith       ierr = PetscFree(tmp);CHKERRQ(ierr);
719857a15f1SBarry Smith       ierr = PetscFree(tmp2);CHKERRQ(ierr);
7208a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
721577dd1f9SKris Buschelman       PetscReal *sums;
722785e854fSJed Brown       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
7238a62d963SHong Zhang       sum  = 0.0;
7248a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
7258a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
7268a62d963SHong Zhang         v  = amat->a + bs2*amat->i[j];
7278a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
7288a62d963SHong Zhang         for (i=0; i<nz; i++) {
7298a62d963SHong Zhang           for (col=0; col<bs; col++) {
7308a62d963SHong Zhang             for (row=0; row<bs; row++) {
7318a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
7328a62d963SHong Zhang             }
7338a62d963SHong Zhang           }
7348a62d963SHong Zhang         }
7358a62d963SHong Zhang         v  = bmat->a + bs2*bmat->i[j];
7368a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
7378a62d963SHong Zhang         for (i=0; i<nz; i++) {
7388a62d963SHong Zhang           for (col=0; col<bs; col++) {
7398a62d963SHong Zhang             for (row=0; row<bs; row++) {
7408a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
7418a62d963SHong Zhang             }
7428a62d963SHong Zhang           }
7438a62d963SHong Zhang         }
7448a62d963SHong Zhang         for (row=0; row<bs; row++) {
7458a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
7468a62d963SHong Zhang         }
7478a62d963SHong Zhang       }
748b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
750ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
751d64ed03dSBarry Smith   }
7523a40ed3dSBarry Smith   PetscFunctionReturn(0);
753d6de1c52SSatish Balay }
75457b952d6SSatish Balay 
755fef45726SSatish Balay /*
756fef45726SSatish Balay   Creates the hash table, and sets the table
757fef45726SSatish Balay   This table is created only once.
758fef45726SSatish Balay   If new entried need to be added to the matrix
759fef45726SSatish Balay   then the hash table has to be destroyed and
760fef45726SSatish Balay   recreated.
761fef45726SSatish Balay */
762dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
763596b8d2eSBarry Smith {
764596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
765596b8d2eSBarry Smith   Mat            A     = baij->A,B=baij->B;
766596b8d2eSBarry Smith   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
767b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7686849ba73SBarry Smith   PetscErrorCode ierr;
769fca92195SBarry Smith   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
770899cda47SBarry Smith   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
771b24ad042SBarry Smith   PetscInt       *HT,key;
7723eda8832SBarry Smith   MatScalar      **HD;
773329f5518SBarry Smith   PetscReal      tmp;
7746cf91177SBarry Smith #if defined(PETSC_USE_INFO)
775b24ad042SBarry Smith   PetscInt ct=0,max=0;
7764a15367fSSatish Balay #endif
777fef45726SSatish Balay 
778d64ed03dSBarry Smith   PetscFunctionBegin;
779fca92195SBarry Smith   if (baij->ht) PetscFunctionReturn(0);
780fef45726SSatish Balay 
781fca92195SBarry Smith   baij->ht_size = (PetscInt)(factor*nz);
782fca92195SBarry Smith   ht_size       = baij->ht_size;
7830bdbc534SSatish Balay 
784fef45726SSatish Balay   /* Allocate Memory for Hash Table */
7851795a4d1SJed Brown   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
786b9e4cc15SSatish Balay   HD   = baij->hd;
787a07cd24cSSatish Balay   HT   = baij->ht;
788b9e4cc15SSatish Balay 
789596b8d2eSBarry Smith   /* Loop Over A */
7900bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
791596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
7920bdbc534SSatish Balay       row = i+rstart;
7930bdbc534SSatish Balay       col = aj[j]+cstart;
794596b8d2eSBarry Smith 
795187ce0cbSSatish Balay       key = row*Nbs + col + 1;
796fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
797fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
798fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
799fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
800fca92195SBarry Smith           HD[(h1+k)%ht_size] = a->a + j*bs2;
801596b8d2eSBarry Smith           break;
8026cf91177SBarry Smith #if defined(PETSC_USE_INFO)
803187ce0cbSSatish Balay         } else {
804187ce0cbSSatish Balay           ct++;
805187ce0cbSSatish Balay #endif
806596b8d2eSBarry Smith         }
807187ce0cbSSatish Balay       }
8086cf91177SBarry Smith #if defined(PETSC_USE_INFO)
809187ce0cbSSatish Balay       if (k> max) max = k;
810187ce0cbSSatish Balay #endif
811596b8d2eSBarry Smith     }
812596b8d2eSBarry Smith   }
813596b8d2eSBarry Smith   /* Loop Over B */
8140bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
815596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
8160bdbc534SSatish Balay       row = i+rstart;
8170bdbc534SSatish Balay       col = garray[bj[j]];
818187ce0cbSSatish Balay       key = row*Nbs + col + 1;
819fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
820fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
821fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
822fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
823fca92195SBarry Smith           HD[(h1+k)%ht_size] = b->a + j*bs2;
824596b8d2eSBarry Smith           break;
8256cf91177SBarry Smith #if defined(PETSC_USE_INFO)
826187ce0cbSSatish Balay         } else {
827187ce0cbSSatish Balay           ct++;
828187ce0cbSSatish Balay #endif
829596b8d2eSBarry Smith         }
830187ce0cbSSatish Balay       }
8316cf91177SBarry Smith #if defined(PETSC_USE_INFO)
832187ce0cbSSatish Balay       if (k> max) max = k;
833187ce0cbSSatish Balay #endif
834596b8d2eSBarry Smith     }
835596b8d2eSBarry Smith   }
836596b8d2eSBarry Smith 
837596b8d2eSBarry Smith   /* Print Summary */
8386cf91177SBarry Smith #if defined(PETSC_USE_INFO)
839fca92195SBarry Smith   for (i=0,j=0; i<ht_size; i++) {
84026fbe8dcSKarl Rupp     if (HT[i]) j++;
841c38d4ed2SBarry Smith   }
8421e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
843187ce0cbSSatish Balay #endif
8443a40ed3dSBarry Smith   PetscFunctionReturn(0);
845596b8d2eSBarry Smith }
84657b952d6SSatish Balay 
847dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
848bbb85fb3SSatish Balay {
849bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
850dfbe8321SBarry Smith   PetscErrorCode ierr;
851b24ad042SBarry Smith   PetscInt       nstash,reallocs;
852bbb85fb3SSatish Balay 
853bbb85fb3SSatish Balay   PetscFunctionBegin;
85426fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
855bbb85fb3SSatish Balay 
856d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
8571e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
8588798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8591e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
86046680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
8611e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
862bbb85fb3SSatish Balay   PetscFunctionReturn(0);
863bbb85fb3SSatish Balay }
864bbb85fb3SSatish Balay 
865dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
866bbb85fb3SSatish Balay {
867bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
86891c97fd4SSatish Balay   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
8696849ba73SBarry Smith   PetscErrorCode ierr;
870b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
871e44c0bd4SBarry Smith   PetscInt       *row,*col;
872ace3abfcSBarry Smith   PetscBool      r1,r2,r3,other_disassembled;
8733eda8832SBarry Smith   MatScalar      *val;
874b24ad042SBarry Smith   PetscMPIInt    n;
875bbb85fb3SSatish Balay 
876bbb85fb3SSatish Balay   PetscFunctionBegin;
8775fd66863SKarl Rupp   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
8784cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
879a2d1c673SSatish Balay     while (1) {
8808798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
881a2d1c673SSatish Balay       if (!flg) break;
882a2d1c673SSatish Balay 
883bbb85fb3SSatish Balay       for (i=0; i<n;) {
884bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
88526fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
88626fbe8dcSKarl Rupp           if (row[j] != rstart) break;
88726fbe8dcSKarl Rupp         }
888bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
889bbb85fb3SSatish Balay         else       ncols = n-i;
890bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
8914b4eb8d3SJed Brown         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
892bbb85fb3SSatish Balay         i    = j;
893bbb85fb3SSatish Balay       }
894bbb85fb3SSatish Balay     }
8958798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
896a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
897a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
898a2d1c673SSatish Balay        restore the original flags */
899a2d1c673SSatish Balay     r1 = baij->roworiented;
900a2d1c673SSatish Balay     r2 = a->roworiented;
90191c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
90226fbe8dcSKarl Rupp 
9037c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
9047c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
90526fbe8dcSKarl Rupp 
90691c97fd4SSatish Balay     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
907a2d1c673SSatish Balay     while (1) {
9088798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
909a2d1c673SSatish Balay       if (!flg) break;
910a2d1c673SSatish Balay 
911a2d1c673SSatish Balay       for (i=0; i<n;) {
912a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
91326fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
91426fbe8dcSKarl Rupp           if (row[j] != rstart) break;
91526fbe8dcSKarl Rupp         }
916a2d1c673SSatish Balay         if (j < n) ncols = j-i;
917a2d1c673SSatish Balay         else       ncols = n-i;
9184b4eb8d3SJed Brown         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
919a2d1c673SSatish Balay         i    = j;
920a2d1c673SSatish Balay       }
921a2d1c673SSatish Balay     }
9228798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
92326fbe8dcSKarl Rupp 
924a2d1c673SSatish Balay     baij->roworiented = r1;
925a2d1c673SSatish Balay     a->roworiented    = r2;
92626fbe8dcSKarl Rupp 
92791c97fd4SSatish Balay     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
928bbb85fb3SSatish Balay   }
929bbb85fb3SSatish Balay 
930bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
931bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
932bbb85fb3SSatish Balay 
933bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
934bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
935bbb85fb3SSatish Balay   /*
936bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
937bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
938bbb85fb3SSatish Balay   */
939bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
940b2566f29SBarry Smith     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
941bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
942ab9863d7SBarry Smith       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
943bbb85fb3SSatish Balay     }
944bbb85fb3SSatish Balay   }
945bbb85fb3SSatish Balay 
946bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
947bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
948bbb85fb3SSatish Balay   }
949bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
950bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
951bbb85fb3SSatish Balay 
9526cf91177SBarry Smith #if defined(PETSC_USE_INFO)
953bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
954abf3b562SBarry Smith     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
95526fbe8dcSKarl Rupp 
956bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
957bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
958bbb85fb3SSatish Balay   }
959bbb85fb3SSatish Balay #endif
960bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
961bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
96226fbe8dcSKarl Rupp 
963bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
964bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
965bbb85fb3SSatish Balay   }
966bbb85fb3SSatish Balay 
967fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
96826fbe8dcSKarl Rupp 
969f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
9704f9cfa9eSBarry Smith 
9714f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
9724f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
973e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
974b2566f29SBarry Smith     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
975e56f5c9eSBarry Smith   }
976bbb85fb3SSatish Balay   PetscFunctionReturn(0);
977bbb85fb3SSatish Balay }
97857b952d6SSatish Balay 
9797da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
9809804daf3SBarry Smith #include <petscdraw.h>
9816849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
98257b952d6SSatish Balay {
98357b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
984dfbe8321SBarry Smith   PetscErrorCode    ierr;
9857da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
986d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
987ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
988b0a32e0cSBarry Smith   PetscViewer       sviewer;
989f3ef73ceSBarry Smith   PetscViewerFormat format;
99057b952d6SSatish Balay 
991d64ed03dSBarry Smith   PetscFunctionBegin;
992251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
993251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
99432077d6dSBarry Smith   if (iascii) {
995b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
996456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
9974e220ebcSLois Curfman McInnes       MatInfo info;
998ce94432eSBarry Smith       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
999d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
10001575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1001b1e9c6f1SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1002b1e9c6f1SBarry Smith                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1003d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1004e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1005d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1006e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1007b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
10081575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
100907d81ca4SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
101057b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10113a40ed3dSBarry Smith       PetscFunctionReturn(0);
1012fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
101377431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
10143a40ed3dSBarry Smith       PetscFunctionReturn(0);
101504929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
101604929863SHong Zhang       PetscFunctionReturn(0);
101757b952d6SSatish Balay     }
101857b952d6SSatish Balay   }
101957b952d6SSatish Balay 
10200f5bd95cSBarry Smith   if (isdraw) {
1021b0a32e0cSBarry Smith     PetscDraw draw;
1022ace3abfcSBarry Smith     PetscBool isnull;
1023b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
102445f3bb6eSLisandro Dalcin     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
102545f3bb6eSLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
102657b952d6SSatish Balay   }
102757b952d6SSatish Balay 
10287da1fb6eSBarry Smith   {
102957b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
103057b952d6SSatish Balay     Mat         A;
103157b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1032d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
10333eda8832SBarry Smith     MatScalar   *a;
10343e219373SBarry Smith     const char  *matname;
103557b952d6SSatish Balay 
1036f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1037f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
1038ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
103957b952d6SSatish Balay     if (!rank) {
1040f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1041d64ed03dSBarry Smith     } else {
1042f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
104357b952d6SSatish Balay     }
1044f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
10450298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
10462b82e772SSatish Balay     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
10473bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
104857b952d6SSatish Balay 
104957b952d6SSatish Balay     /* copy over the A part */
105057b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
105157b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1052785e854fSJed Brown     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
105357b952d6SSatish Balay 
105457b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1055899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
105626fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
105757b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1058899cda47SBarry Smith         col = (baij->cstartbs+aj[j])*bs;
105957b952d6SSatish Balay         for (k=0; k<bs; k++) {
106097e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1061cee3aa6bSSatish Balay           col++; a += bs;
106257b952d6SSatish Balay         }
106357b952d6SSatish Balay       }
106457b952d6SSatish Balay     }
106557b952d6SSatish Balay     /* copy over the B part */
106657b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
106757b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
106857b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1069899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
107026fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
107157b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
107257b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
107357b952d6SSatish Balay         for (k=0; k<bs; k++) {
107497e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1075cee3aa6bSSatish Balay           col++; a += bs;
107657b952d6SSatish Balay         }
107757b952d6SSatish Balay       }
107857b952d6SSatish Balay     }
1079606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10806d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10816d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
108255843e3eSBarry Smith     /*
108355843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1084b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
108555843e3eSBarry Smith     */
10863f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1087ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
10883e219373SBarry Smith     if (!rank) {
1089ade3a672SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
10907da1fb6eSBarry Smith       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
109157b952d6SSatish Balay     }
10923f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
10931575c14dSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
10946bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
109557b952d6SSatish Balay   }
10963a40ed3dSBarry Smith   PetscFunctionReturn(0);
109757b952d6SSatish Balay }
109857b952d6SSatish Balay 
1099618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
1100b51a4376SLisandro Dalcin PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1101660746e0SBarry Smith {
1102b51a4376SLisandro Dalcin   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1103b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1104b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1105b51a4376SLisandro Dalcin   const PetscInt *garray = aij->garray;
1106b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1107b51a4376SLisandro Dalcin   PetscInt       *rowlens,*colidxs;
1108b51a4376SLisandro Dalcin   PetscScalar    *matvals;
1109660746e0SBarry Smith   PetscErrorCode ierr;
1110660746e0SBarry Smith 
1111660746e0SBarry Smith   PetscFunctionBegin;
1112b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1113b51a4376SLisandro Dalcin 
1114b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1115b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1116b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1117b51a4376SLisandro Dalcin   rs = mat->rmap->rstart;
1118b51a4376SLisandro Dalcin   cs = mat->cmap->rstart;
1119b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1120b51a4376SLisandro Dalcin   nz = bs*bs*(A->nz + B->nz);
1121b51a4376SLisandro Dalcin 
1122b51a4376SLisandro Dalcin   /* write matrix header */
1123660746e0SBarry Smith   header[0] = MAT_FILE_CLASSID;
1124b51a4376SLisandro Dalcin   header[1] = M; header[2] = N; header[3] = nz;
1125ce94432eSBarry Smith   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1126b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1127660746e0SBarry Smith 
1128b51a4376SLisandro Dalcin   /* fill in and store row lengths */
1129b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1130b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1131b51a4376SLisandro Dalcin     for (j=0; j<bs; j++)
1132b51a4376SLisandro Dalcin       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1133b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1134b51a4376SLisandro Dalcin   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1135660746e0SBarry Smith 
1136b51a4376SLisandro Dalcin   /* fill in and store column indices */
1137b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1138b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++) {
1139b51a4376SLisandro Dalcin     for (k=0; k<bs; k++) {
1140b51a4376SLisandro Dalcin       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1141b51a4376SLisandro Dalcin         if (garray[B->j[jb]] > cs/bs) break;
1142b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1143b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1144660746e0SBarry Smith       }
1145b51a4376SLisandro Dalcin       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1146b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1147b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1148b51a4376SLisandro Dalcin       for (; jb<B->i[i+1]; jb++)
1149b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1150b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1151660746e0SBarry Smith     }
1152660746e0SBarry Smith   }
1153660746e0SBarry Smith   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1154b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1155b51a4376SLisandro Dalcin   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1156660746e0SBarry Smith 
1157b51a4376SLisandro Dalcin   /* fill in and store nonzero values */
1158b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1159b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++) {
1160b51a4376SLisandro Dalcin     for (k=0; k<bs; k++) {
1161b51a4376SLisandro Dalcin       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1162b51a4376SLisandro Dalcin         if (garray[B->j[jb]] > cs/bs) break;
1163b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1164b51a4376SLisandro Dalcin           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1165660746e0SBarry Smith       }
1166b51a4376SLisandro Dalcin       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1167b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1168b51a4376SLisandro Dalcin           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1169b51a4376SLisandro Dalcin       for (; jb<B->i[i+1]; jb++)
1170b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1171d21b9a37SPierre Jolivet           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1172660746e0SBarry Smith     }
1173b51a4376SLisandro Dalcin   }
1174b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1175b51a4376SLisandro Dalcin   ierr = PetscFree(matvals);CHKERRQ(ierr);
1176660746e0SBarry Smith 
1177b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
1178b51a4376SLisandro Dalcin   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1179660746e0SBarry Smith   PetscFunctionReturn(0);
1180660746e0SBarry Smith }
1181660746e0SBarry Smith 
1182dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
118357b952d6SSatish Balay {
1184dfbe8321SBarry Smith   PetscErrorCode ierr;
1185ace3abfcSBarry Smith   PetscBool      iascii,isdraw,issocket,isbinary;
118657b952d6SSatish Balay 
1187d64ed03dSBarry Smith   PetscFunctionBegin;
1188251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1189251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1190251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1191251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192660746e0SBarry Smith   if (iascii || isdraw || issocket) {
11937b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1194660746e0SBarry Smith   } else if (isbinary) {
1195660746e0SBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
119657b952d6SSatish Balay   }
11973a40ed3dSBarry Smith   PetscFunctionReturn(0);
119857b952d6SSatish Balay }
119957b952d6SSatish Balay 
1200dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
120179bdfe76SSatish Balay {
120279bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1203dfbe8321SBarry Smith   PetscErrorCode ierr;
120479bdfe76SSatish Balay 
1205d64ed03dSBarry Smith   PetscFunctionBegin;
1206aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1207d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
120879bdfe76SSatish Balay #endif
12098798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12108798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
12116bf464f9SBarry Smith   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
12126bf464f9SBarry Smith   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1213aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
12146bc0bbbfSBarry Smith   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
121548e59246SSatish Balay #else
121605b42c5fSBarry Smith   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
121748e59246SSatish Balay #endif
121805b42c5fSBarry Smith   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
12196bf464f9SBarry Smith   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
12206bf464f9SBarry Smith   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1221fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
122205b42c5fSBarry Smith   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1223fca92195SBarry Smith   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1224899cda47SBarry Smith   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1225bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1226901853e0SKris Buschelman 
1227f4259b30SLisandro Dalcin   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1228bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1229bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1230bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1231bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1232bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1233bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1234bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1235bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
12367ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
12377ea3e4caSstefano_zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
12387ea3e4caSstefano_zampini #endif
1239c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
12403a40ed3dSBarry Smith   PetscFunctionReturn(0);
124179bdfe76SSatish Balay }
124279bdfe76SSatish Balay 
1243dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1244cee3aa6bSSatish Balay {
1245cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1246dfbe8321SBarry Smith   PetscErrorCode ierr;
1247b24ad042SBarry Smith   PetscInt       nt;
1248cee3aa6bSSatish Balay 
1249d64ed03dSBarry Smith   PetscFunctionBegin;
1250e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1251e7e72b3dSBarry Smith   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1252e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1253e7e72b3dSBarry Smith   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1254ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1255f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1256ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
12583a40ed3dSBarry Smith   PetscFunctionReturn(0);
1259cee3aa6bSSatish Balay }
1260cee3aa6bSSatish Balay 
1261dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1262cee3aa6bSSatish Balay {
1263cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1264dfbe8321SBarry Smith   PetscErrorCode ierr;
1265d64ed03dSBarry Smith 
1266d64ed03dSBarry Smith   PetscFunctionBegin;
1267ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1269ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12713a40ed3dSBarry Smith   PetscFunctionReturn(0);
1272cee3aa6bSSatish Balay }
1273cee3aa6bSSatish Balay 
1274dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1275cee3aa6bSSatish Balay {
1276cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1277dfbe8321SBarry Smith   PetscErrorCode ierr;
1278cee3aa6bSSatish Balay 
1279d64ed03dSBarry Smith   PetscFunctionBegin;
1280cee3aa6bSSatish Balay   /* do nondiagonal part */
12817c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1282cee3aa6bSSatish Balay   /* do local part */
12837c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1284e4a140f6SJunchao Zhang   /* add partial results together */
1285ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
12873a40ed3dSBarry Smith   PetscFunctionReturn(0);
1288cee3aa6bSSatish Balay }
1289cee3aa6bSSatish Balay 
1290dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1291cee3aa6bSSatish Balay {
1292cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1293dfbe8321SBarry Smith   PetscErrorCode ierr;
1294cee3aa6bSSatish Balay 
1295d64ed03dSBarry Smith   PetscFunctionBegin;
1296cee3aa6bSSatish Balay   /* do nondiagonal part */
12977c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1298cee3aa6bSSatish Balay   /* do local part */
12997c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1300e4a140f6SJunchao Zhang   /* add partial results together */
1301e4a140f6SJunchao Zhang   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
13033a40ed3dSBarry Smith   PetscFunctionReturn(0);
1304cee3aa6bSSatish Balay }
1305cee3aa6bSSatish Balay 
1306cee3aa6bSSatish Balay /*
1307cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1308cee3aa6bSSatish Balay    diagonal block
1309cee3aa6bSSatish Balay */
1310dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1311cee3aa6bSSatish Balay {
1312cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1313dfbe8321SBarry Smith   PetscErrorCode ierr;
1314d64ed03dSBarry Smith 
1315d64ed03dSBarry Smith   PetscFunctionBegin;
1316e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13173a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13183a40ed3dSBarry Smith   PetscFunctionReturn(0);
1319cee3aa6bSSatish Balay }
1320cee3aa6bSSatish Balay 
1321f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1322cee3aa6bSSatish Balay {
1323cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1324dfbe8321SBarry Smith   PetscErrorCode ierr;
1325d64ed03dSBarry Smith 
1326d64ed03dSBarry Smith   PetscFunctionBegin;
1327f4df32b1SMatthew Knepley   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1328f4df32b1SMatthew Knepley   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
13293a40ed3dSBarry Smith   PetscFunctionReturn(0);
1330cee3aa6bSSatish Balay }
1331026e39d0SSatish Balay 
1332b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333acdf5bf4SSatish Balay {
1334acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
133587828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
13366849ba73SBarry Smith   PetscErrorCode ierr;
1337d0f46423SBarry Smith   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1338d0f46423SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1339899cda47SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1340acdf5bf4SSatish Balay 
1341d64ed03dSBarry Smith   PetscFunctionBegin;
1342e7e72b3dSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1343e32f2f54SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1344acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1345acdf5bf4SSatish Balay 
1346acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1347acdf5bf4SSatish Balay     /*
1348acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1349acdf5bf4SSatish Balay     */
1350acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1351b24ad042SBarry Smith     PetscInt    max = 1,mbs = mat->mbs,tmp;
1352bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1353acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
135426fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1355acdf5bf4SSatish Balay     }
1356dcca6d9dSJed Brown     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1357acdf5bf4SSatish Balay   }
1358d9d09a02SSatish Balay   lrow = row - brstart;
1359acdf5bf4SSatish Balay 
1360acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1361f4259b30SLisandro Dalcin   if (!v)   {pvA = NULL; pvB = NULL;}
1362f4259b30SLisandro Dalcin   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1363f830108cSBarry Smith   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1364f830108cSBarry Smith   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1365acdf5bf4SSatish Balay   nztot = nzA + nzB;
1366acdf5bf4SSatish Balay 
1367acdf5bf4SSatish Balay   cmap = mat->garray;
1368acdf5bf4SSatish Balay   if (v  || idx) {
1369acdf5bf4SSatish Balay     if (nztot) {
1370acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1371b24ad042SBarry Smith       PetscInt imark = -1;
1372acdf5bf4SSatish Balay       if (v) {
1373acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1374acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1375d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1376acdf5bf4SSatish Balay           else break;
1377acdf5bf4SSatish Balay         }
1378acdf5bf4SSatish Balay         imark = i;
1379acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1380acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1381acdf5bf4SSatish Balay       }
1382acdf5bf4SSatish Balay       if (idx) {
1383acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1384acdf5bf4SSatish Balay         if (imark > -1) {
1385acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1386bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1387acdf5bf4SSatish Balay           }
1388acdf5bf4SSatish Balay         } else {
1389acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
139026fbe8dcSKarl Rupp             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1391acdf5bf4SSatish Balay             else break;
1392acdf5bf4SSatish Balay           }
1393acdf5bf4SSatish Balay           imark = i;
1394acdf5bf4SSatish Balay         }
1395d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1396d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1397acdf5bf4SSatish Balay       }
1398d64ed03dSBarry Smith     } else {
1399f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1400f4259b30SLisandro Dalcin       if (v)   *v   = NULL;
1401d212a18eSSatish Balay     }
1402acdf5bf4SSatish Balay   }
1403acdf5bf4SSatish Balay   *nz  = nztot;
1404f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1405f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14063a40ed3dSBarry Smith   PetscFunctionReturn(0);
1407acdf5bf4SSatish Balay }
1408acdf5bf4SSatish Balay 
1409b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1410acdf5bf4SSatish Balay {
1411acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1412d64ed03dSBarry Smith 
1413d64ed03dSBarry Smith   PetscFunctionBegin;
1414e7e72b3dSBarry Smith   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1415acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14163a40ed3dSBarry Smith   PetscFunctionReturn(0);
1417acdf5bf4SSatish Balay }
1418acdf5bf4SSatish Balay 
1419dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
142058667388SSatish Balay {
142158667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1422dfbe8321SBarry Smith   PetscErrorCode ierr;
1423d64ed03dSBarry Smith 
1424d64ed03dSBarry Smith   PetscFunctionBegin;
142558667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
142658667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14273a40ed3dSBarry Smith   PetscFunctionReturn(0);
142858667388SSatish Balay }
14290ac07820SSatish Balay 
1430dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14310ac07820SSatish Balay {
14324e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
14334e220ebcSLois Curfman McInnes   Mat            A  = a->A,B = a->B;
1434dfbe8321SBarry Smith   PetscErrorCode ierr;
14353966268fSBarry Smith   PetscLogDouble isend[5],irecv[5];
14360ac07820SSatish Balay 
1437d64ed03dSBarry Smith   PetscFunctionBegin;
1438d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
143926fbe8dcSKarl Rupp 
14404e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
144126fbe8dcSKarl Rupp 
14420e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1443de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
144426fbe8dcSKarl Rupp 
14454e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
144626fbe8dcSKarl Rupp 
14470e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1448de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
144926fbe8dcSKarl Rupp 
14500ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14514e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14524e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14534e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14544e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14554e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14560ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14573966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
145826fbe8dcSKarl Rupp 
14594e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14604e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14614e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14624e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14634e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14640ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14653966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
146626fbe8dcSKarl Rupp 
14674e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14684e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14694e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14704e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14714e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1472ce94432eSBarry Smith   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
14734e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14744e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14754e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14763a40ed3dSBarry Smith   PetscFunctionReturn(0);
14770ac07820SSatish Balay }
14780ac07820SSatish Balay 
1479ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
148058667388SSatish Balay {
148158667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1482dfbe8321SBarry Smith   PetscErrorCode ierr;
148358667388SSatish Balay 
1484d64ed03dSBarry Smith   PetscFunctionBegin;
148512c028f9SKris Buschelman   switch (op) {
1486512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
148712c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
148828b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1489a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
149012c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
149143674050SBarry Smith     MatCheckPreallocated(A,1);
14924e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
14934e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
149412c028f9SKris Buschelman     break;
149512c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
149643674050SBarry Smith     MatCheckPreallocated(A,1);
14974e0d8c25SBarry Smith     a->roworiented = flg;
149826fbe8dcSKarl Rupp 
14994e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
15004e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
150112c028f9SKris Buschelman     break;
15024e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
1503071fcb05SBarry Smith   case MAT_SORTED_FULL:
1504290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
150512c028f9SKris Buschelman     break;
150612c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15074e0d8c25SBarry Smith     a->donotstash = flg;
150812c028f9SKris Buschelman     break;
150912c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15104e0d8c25SBarry Smith     a->ht_flag = flg;
1511abf3b562SBarry Smith     a->ht_fact = 1.39;
151212c028f9SKris Buschelman     break;
151377e54ba9SKris Buschelman   case MAT_SYMMETRIC:
151477e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15152188ac68SBarry Smith   case MAT_HERMITIAN:
1516c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
15172188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
151843674050SBarry Smith     MatCheckPreallocated(A,1);
15194e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
152077e54ba9SKris Buschelman     break;
152112c028f9SKris Buschelman   default:
1522ce94432eSBarry Smith     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1523d64ed03dSBarry Smith   }
15243a40ed3dSBarry Smith   PetscFunctionReturn(0);
152558667388SSatish Balay }
152658667388SSatish Balay 
1527fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
15280ac07820SSatish Balay {
15290ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
15300ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
15310ac07820SSatish Balay   Mat            B;
1532dfbe8321SBarry Smith   PetscErrorCode ierr;
1533d0f46423SBarry Smith   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1534d0f46423SBarry Smith   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
15353eda8832SBarry Smith   MatScalar      *a;
15360ac07820SSatish Balay 
1537d64ed03dSBarry Smith   PetscFunctionBegin;
1538cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1539ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1540d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
15417adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
15422e72b8d9SBarry Smith     /* Do not know preallocation information, but must set block size */
15430298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1544fc4dec0aSBarry Smith   } else {
1545fc4dec0aSBarry Smith     B = *matout;
1546fc4dec0aSBarry Smith   }
15470ac07820SSatish Balay 
15480ac07820SSatish Balay   /* copy over the A part */
15490ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15500ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1551785e854fSJed Brown   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
15520ac07820SSatish Balay 
15530ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1554899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
155526fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
15560ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1557899cda47SBarry Smith       col = (baij->cstartbs+aj[j])*bs;
15580ac07820SSatish Balay       for (k=0; k<bs; k++) {
155997e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
156026fbe8dcSKarl Rupp 
15610ac07820SSatish Balay         col++; a += bs;
15620ac07820SSatish Balay       }
15630ac07820SSatish Balay     }
15640ac07820SSatish Balay   }
15650ac07820SSatish Balay   /* copy over the B part */
15660ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
15670ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
15680ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1569899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
157026fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
15710ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15720ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15730ac07820SSatish Balay       for (k=0; k<bs; k++) {
157497e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
157526fbe8dcSKarl Rupp         col++;
157626fbe8dcSKarl Rupp         a += bs;
15770ac07820SSatish Balay       }
15780ac07820SSatish Balay     }
15790ac07820SSatish Balay   }
1580606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
15810ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15820ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15830ac07820SSatish Balay 
1584cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
158526fbe8dcSKarl Rupp   else {
158628be2f97SBarry Smith     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
15870ac07820SSatish Balay   }
15883a40ed3dSBarry Smith   PetscFunctionReturn(0);
15890ac07820SSatish Balay }
15900e95ebc0SSatish Balay 
1591dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
15920e95ebc0SSatish Balay {
159336c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
159436c4a09eSSatish Balay   Mat            a     = baij->A,b = baij->B;
1595dfbe8321SBarry Smith   PetscErrorCode ierr;
1596b24ad042SBarry Smith   PetscInt       s1,s2,s3;
15970e95ebc0SSatish Balay 
1598d64ed03dSBarry Smith   PetscFunctionBegin;
159936c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
160036c4a09eSSatish Balay   if (rr) {
160136c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1602e32f2f54SBarry Smith     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
160336c4a09eSSatish Balay     /* Overlap communication with computation. */
1604ca9f406cSSatish Balay     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
160536c4a09eSSatish Balay   }
16060e95ebc0SSatish Balay   if (ll) {
16070e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1608e32f2f54SBarry Smith     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
16090298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
16100e95ebc0SSatish Balay   }
161136c4a09eSSatish Balay   /* scale  the diagonal block */
161236c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
161336c4a09eSSatish Balay 
161436c4a09eSSatish Balay   if (rr) {
161536c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1616ca9f406cSSatish Balay     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
16170298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
161836c4a09eSSatish Balay   }
16193a40ed3dSBarry Smith   PetscFunctionReturn(0);
16200e95ebc0SSatish Balay }
16210e95ebc0SSatish Balay 
16222b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
16230ac07820SSatish Balay {
16240ac07820SSatish Balay   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
162565a92638SMatthew G. Knepley   PetscInt      *lrows;
16266e520ac8SStefano Zampini   PetscInt       r, len;
162794342113SStefano Zampini   PetscBool      cong;
16286849ba73SBarry Smith   PetscErrorCode ierr;
16290ac07820SSatish Balay 
1630d64ed03dSBarry Smith   PetscFunctionBegin;
16316e520ac8SStefano Zampini   /* get locally owned rows */
16326e520ac8SStefano Zampini   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
163397b48c8fSBarry Smith   /* fix right hand side if needed */
163497b48c8fSBarry Smith   if (x && b) {
163565a92638SMatthew G. Knepley     const PetscScalar *xx;
163665a92638SMatthew G. Knepley     PetscScalar       *bb;
163765a92638SMatthew G. Knepley 
163897b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
163997b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
164065a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
164197b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
164297b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
164397b48c8fSBarry Smith   }
164497b48c8fSBarry Smith 
16450ac07820SSatish Balay   /* actually zap the local rows */
164672dacd9aSBarry Smith   /*
164772dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
1648a8c7a070SBarry Smith      is square and the user wishes to set the diagonal we use separate
164972dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
165072dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
165172dacd9aSBarry Smith 
165272dacd9aSBarry Smith   */
16539c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1654a34163a4SJed Brown   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
165594342113SStefano Zampini   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
165694342113SStefano Zampini   if ((diag != 0.0) && cong) {
1657a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1658f4df32b1SMatthew Knepley   } else if (diag != 0.0) {
1659f4259b30SLisandro Dalcin     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1660e7e72b3dSBarry Smith     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1661512a5fc5SBarry Smith        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
166265a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) {
166365a92638SMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
1664f4df32b1SMatthew Knepley       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1665a07cd24cSSatish Balay     }
1666a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16689c957beeSSatish Balay   } else {
1669a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1670a07cd24cSSatish Balay   }
1671606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
16724f9cfa9eSBarry Smith 
16734f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
16744f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1675e56f5c9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1676b2566f29SBarry Smith     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1677e56f5c9eSBarry Smith   }
16783a40ed3dSBarry Smith   PetscFunctionReturn(0);
16790ac07820SSatish Balay }
168072dacd9aSBarry Smith 
16816f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
16826f0a72daSMatthew G. Knepley {
16836f0a72daSMatthew G. Knepley   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
16846f0a72daSMatthew G. Knepley   PetscErrorCode    ierr;
1685131c27b5Sprj-   PetscMPIInt       n = A->rmap->n,p = 0;
1686131c27b5Sprj-   PetscInt          i,j,k,r,len = 0,row,col,count;
16876f0a72daSMatthew G. Knepley   PetscInt          *lrows,*owners = A->rmap->range;
16886f0a72daSMatthew G. Knepley   PetscSFNode       *rrows;
16896f0a72daSMatthew G. Knepley   PetscSF           sf;
16906f0a72daSMatthew G. Knepley   const PetscScalar *xx;
16916f0a72daSMatthew G. Knepley   PetscScalar       *bb,*mask;
16926f0a72daSMatthew G. Knepley   Vec               xmask,lmask;
16936f0a72daSMatthew G. Knepley   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
16946f0a72daSMatthew G. Knepley   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
16956f0a72daSMatthew G. Knepley   PetscScalar       *aa;
16966f0a72daSMatthew G. Knepley 
16976f0a72daSMatthew G. Knepley   PetscFunctionBegin;
16986f0a72daSMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
16996f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
17006f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
17016f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
17026f0a72daSMatthew G. Knepley   for (r = 0; r < N; ++r) {
17036f0a72daSMatthew G. Knepley     const PetscInt idx   = rows[r];
17045ba17502SJed Brown     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
17055ba17502SJed Brown     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
17065ba17502SJed Brown       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
17075ba17502SJed Brown     }
17086f0a72daSMatthew G. Knepley     rrows[r].rank  = p;
17096f0a72daSMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
17106f0a72daSMatthew G. Knepley   }
17116f0a72daSMatthew G. Knepley   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
17126f0a72daSMatthew G. Knepley   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
17136f0a72daSMatthew G. Knepley   /* Collect flags for rows to be zeroed */
17146f0a72daSMatthew G. Knepley   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
17156f0a72daSMatthew G. Knepley   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
17166f0a72daSMatthew G. Knepley   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
17176f0a72daSMatthew G. Knepley   /* Compress and put in row numbers */
17186f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
17196f0a72daSMatthew G. Knepley   /* zero diagonal part of matrix */
17206f0a72daSMatthew G. Knepley   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
17216f0a72daSMatthew G. Knepley   /* handle off diagonal part of matrix */
17222a7a6963SBarry Smith   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
17236f0a72daSMatthew G. Knepley   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
17246f0a72daSMatthew G. Knepley   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
17256f0a72daSMatthew G. Knepley   for (i=0; i<len; i++) bb[lrows[i]] = 1;
17266f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
17276f0a72daSMatthew G. Knepley   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17286f0a72daSMatthew G. Knepley   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17296f0a72daSMatthew G. Knepley   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
17306f0a72daSMatthew G. Knepley   if (x) {
17316f0a72daSMatthew G. Knepley     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17326f0a72daSMatthew G. Knepley     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17336f0a72daSMatthew G. Knepley     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
17346f0a72daSMatthew G. Knepley     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
17356f0a72daSMatthew G. Knepley   }
17366f0a72daSMatthew G. Knepley   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
17376f0a72daSMatthew G. Knepley   /* remove zeroed rows of off diagonal matrix */
17386f0a72daSMatthew G. Knepley   for (i = 0; i < len; ++i) {
17396f0a72daSMatthew G. Knepley     row   = lrows[i];
17406f0a72daSMatthew G. Knepley     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
17416f0a72daSMatthew G. Knepley     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
17426f0a72daSMatthew G. Knepley     for (k = 0; k < count; ++k) {
17436f0a72daSMatthew G. Knepley       aa[0] = 0.0;
17446f0a72daSMatthew G. Knepley       aa   += bs;
17456f0a72daSMatthew G. Knepley     }
17466f0a72daSMatthew G. Knepley   }
17476f0a72daSMatthew G. Knepley   /* loop over all elements of off process part of matrix zeroing removed columns*/
17486f0a72daSMatthew G. Knepley   for (i = 0; i < l->B->rmap->N; ++i) {
17496f0a72daSMatthew G. Knepley     row = i/bs;
17506f0a72daSMatthew G. Knepley     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
17516f0a72daSMatthew G. Knepley       for (k = 0; k < bs; ++k) {
17526f0a72daSMatthew G. Knepley         col = bs*baij->j[j] + k;
17536f0a72daSMatthew G. Knepley         if (PetscAbsScalar(mask[col])) {
17546f0a72daSMatthew G. Knepley           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
175589ae1891SBarry Smith           if (x) bb[i] -= aa[0]*xx[col];
17566f0a72daSMatthew G. Knepley           aa[0] = 0.0;
17576f0a72daSMatthew G. Knepley         }
17586f0a72daSMatthew G. Knepley       }
17596f0a72daSMatthew G. Knepley     }
17606f0a72daSMatthew G. Knepley   }
17616f0a72daSMatthew G. Knepley   if (x) {
17626f0a72daSMatthew G. Knepley     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
17636f0a72daSMatthew G. Knepley     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
17646f0a72daSMatthew G. Knepley   }
17656f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
17666f0a72daSMatthew G. Knepley   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
17676f0a72daSMatthew G. Knepley   ierr = PetscFree(lrows);CHKERRQ(ierr);
17684f9cfa9eSBarry Smith 
17694f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
17704f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
17714f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1772b2566f29SBarry Smith     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
17734f9cfa9eSBarry Smith   }
17746f0a72daSMatthew G. Knepley   PetscFunctionReturn(0);
17756f0a72daSMatthew G. Knepley }
17766f0a72daSMatthew G. Knepley 
1777dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1778bb5a7306SBarry Smith {
1779bb5a7306SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1780dfbe8321SBarry Smith   PetscErrorCode ierr;
1781d64ed03dSBarry Smith 
1782d64ed03dSBarry Smith   PetscFunctionBegin;
1783bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
17843a40ed3dSBarry Smith   PetscFunctionReturn(0);
1785bb5a7306SBarry Smith }
1786bb5a7306SBarry Smith 
17876849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
17880ac07820SSatish Balay 
1789ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
17907fc3c18eSBarry Smith {
17917fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
17927fc3c18eSBarry Smith   Mat            a,b,c,d;
1793ace3abfcSBarry Smith   PetscBool      flg;
1794dfbe8321SBarry Smith   PetscErrorCode ierr;
17957fc3c18eSBarry Smith 
17967fc3c18eSBarry Smith   PetscFunctionBegin;
17977fc3c18eSBarry Smith   a = matA->A; b = matA->B;
17987fc3c18eSBarry Smith   c = matB->A; d = matB->B;
17997fc3c18eSBarry Smith 
18007fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1801abc0a331SBarry Smith   if (flg) {
18027fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18037fc3c18eSBarry Smith   }
1804b2566f29SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
18057fc3c18eSBarry Smith   PetscFunctionReturn(0);
18067fc3c18eSBarry Smith }
18077fc3c18eSBarry Smith 
18083c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
18093c896bc6SHong Zhang {
18103c896bc6SHong Zhang   PetscErrorCode ierr;
18113c896bc6SHong Zhang   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
18123c896bc6SHong Zhang   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
18133c896bc6SHong Zhang 
18143c896bc6SHong Zhang   PetscFunctionBegin;
18153c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
18163c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
18173c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
18183c896bc6SHong Zhang   } else {
18193c896bc6SHong Zhang     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
18203c896bc6SHong Zhang     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
18213c896bc6SHong Zhang   }
1822cdc753b6SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
18233c896bc6SHong Zhang   PetscFunctionReturn(0);
18243c896bc6SHong Zhang }
1825273d9f13SBarry Smith 
18264994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1827273d9f13SBarry Smith {
1828dfbe8321SBarry Smith   PetscErrorCode ierr;
1829273d9f13SBarry Smith 
1830273d9f13SBarry Smith   PetscFunctionBegin;
1831f4259b30SLisandro Dalcin   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1832273d9f13SBarry Smith   PetscFunctionReturn(0);
1833273d9f13SBarry Smith }
1834273d9f13SBarry Smith 
18354de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
18364de5dceeSHong Zhang {
1837001ddc4fSHong Zhang   PetscErrorCode ierr;
1838001ddc4fSHong Zhang   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
18394de5dceeSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
18404de5dceeSHong Zhang   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
18414de5dceeSHong Zhang 
18424de5dceeSHong Zhang   PetscFunctionBegin;
1843001ddc4fSHong Zhang   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
18444de5dceeSHong Zhang   PetscFunctionReturn(0);
18454de5dceeSHong Zhang }
18464de5dceeSHong Zhang 
18474fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
18484fe895cdSHong Zhang {
18494fe895cdSHong Zhang   PetscErrorCode ierr;
18504fe895cdSHong Zhang   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
18514fe895cdSHong Zhang   PetscBLASInt   bnz,one=1;
18524fe895cdSHong Zhang   Mat_SeqBAIJ    *x,*y;
1853b31f67cfSBarry Smith   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
18544fe895cdSHong Zhang 
18554fe895cdSHong Zhang   PetscFunctionBegin;
18564fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
18574fe895cdSHong Zhang     PetscScalar alpha = a;
18584fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->A->data;
18594fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->A->data;
1860b31f67cfSBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
18618b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
18624fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->B->data;
18634fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->B->data;
1864b31f67cfSBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
18658b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1866a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1867ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1868ab784542SHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
18694fe895cdSHong Zhang   } else {
18704de5dceeSHong Zhang     Mat      B;
18714de5dceeSHong Zhang     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
18724de5dceeSHong Zhang     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
18734de5dceeSHong Zhang     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
18744de5dceeSHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
18754de5dceeSHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
18764de5dceeSHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
18774de5dceeSHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
18784de5dceeSHong Zhang     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
18794de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
18804de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
18814de5dceeSHong Zhang     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
18824de5dceeSHong Zhang     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
18834de5dceeSHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
188428be2f97SBarry Smith     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
18854de5dceeSHong Zhang     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
18864de5dceeSHong Zhang     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
18874fe895cdSHong Zhang   }
18884fe895cdSHong Zhang   PetscFunctionReturn(0);
18894fe895cdSHong Zhang }
18904fe895cdSHong Zhang 
189199cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
189299cafbc1SBarry Smith {
189399cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
189499cafbc1SBarry Smith   PetscErrorCode ierr;
189599cafbc1SBarry Smith 
189699cafbc1SBarry Smith   PetscFunctionBegin;
189799cafbc1SBarry Smith   ierr = MatRealPart(a->A);CHKERRQ(ierr);
189899cafbc1SBarry Smith   ierr = MatRealPart(a->B);CHKERRQ(ierr);
189999cafbc1SBarry Smith   PetscFunctionReturn(0);
190099cafbc1SBarry Smith }
190199cafbc1SBarry Smith 
190299cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
190399cafbc1SBarry Smith {
190499cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
190599cafbc1SBarry Smith   PetscErrorCode ierr;
190699cafbc1SBarry Smith 
190799cafbc1SBarry Smith   PetscFunctionBegin;
190899cafbc1SBarry Smith   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
190999cafbc1SBarry Smith   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
191099cafbc1SBarry Smith   PetscFunctionReturn(0);
191199cafbc1SBarry Smith }
191299cafbc1SBarry Smith 
19137dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
19144aa3045dSJed Brown {
19154aa3045dSJed Brown   PetscErrorCode ierr;
19164aa3045dSJed Brown   IS             iscol_local;
19174aa3045dSJed Brown   PetscInt       csize;
19184aa3045dSJed Brown 
19194aa3045dSJed Brown   PetscFunctionBegin;
19204aa3045dSJed Brown   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1921b79d0421SJed Brown   if (call == MAT_REUSE_MATRIX) {
1922b79d0421SJed Brown     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1923e32f2f54SBarry Smith     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1924b79d0421SJed Brown   } else {
19254aa3045dSJed Brown     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1926b79d0421SJed Brown   }
19277dae84e0SHong Zhang   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1928b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
1929b79d0421SJed Brown     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
19306bf464f9SBarry Smith     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1931b79d0421SJed Brown   }
19324aa3045dSJed Brown   PetscFunctionReturn(0);
19334aa3045dSJed Brown }
193417df9f7cSHong Zhang 
193582094794SBarry Smith /*
193682094794SBarry Smith   Not great since it makes two copies of the submatrix, first an SeqBAIJ
193782094794SBarry Smith   in local and then by concatenating the local matrices the end result.
19387dae84e0SHong Zhang   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
19398f46ffcaSHong Zhang   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
194082094794SBarry Smith */
19417dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
194282094794SBarry Smith {
194382094794SBarry Smith   PetscErrorCode ierr;
194482094794SBarry Smith   PetscMPIInt    rank,size;
194582094794SBarry Smith   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1946c9ffca76SHong Zhang   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
194729dcf524SDmitry Karpeev   Mat            M,Mreuse;
194882094794SBarry Smith   MatScalar      *vwork,*aa;
1949ce94432eSBarry Smith   MPI_Comm       comm;
195029dcf524SDmitry Karpeev   IS             isrow_new, iscol_new;
195182094794SBarry Smith   Mat_SeqBAIJ    *aij;
195282094794SBarry Smith 
195382094794SBarry Smith   PetscFunctionBegin;
1954ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
195582094794SBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
195682094794SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
195729dcf524SDmitry Karpeev   /* The compression and expansion should be avoided. Doesn't point
195829dcf524SDmitry Karpeev      out errors, might change the indices, hence buggey */
195929dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
196029dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
196182094794SBarry Smith 
196282094794SBarry Smith   if (call ==  MAT_REUSE_MATRIX) {
196382094794SBarry Smith     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1964e32f2f54SBarry Smith     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
19657dae84e0SHong Zhang     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
196682094794SBarry Smith   } else {
19677dae84e0SHong Zhang     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
196882094794SBarry Smith   }
196929dcf524SDmitry Karpeev   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
197029dcf524SDmitry Karpeev   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
197182094794SBarry Smith   /*
197282094794SBarry Smith       m - number of local rows
197382094794SBarry Smith       n - number of columns (same on all processors)
197482094794SBarry Smith       rstart - first row in new global matrix generated
197582094794SBarry Smith   */
197682094794SBarry Smith   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
197782094794SBarry Smith   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
197882094794SBarry Smith   m    = m/bs;
197982094794SBarry Smith   n    = n/bs;
198082094794SBarry Smith 
198182094794SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
198282094794SBarry Smith     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
198382094794SBarry Smith     ii  = aij->i;
198482094794SBarry Smith     jj  = aij->j;
198582094794SBarry Smith 
198682094794SBarry Smith     /*
198782094794SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
198882094794SBarry Smith         portions of the matrix in order to do correct preallocation
198982094794SBarry Smith     */
199082094794SBarry Smith 
199182094794SBarry Smith     /* first get start and end of "diagonal" columns */
199282094794SBarry Smith     if (csize == PETSC_DECIDE) {
199382094794SBarry Smith       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
199482094794SBarry Smith       if (mglobal == n*bs) { /* square matrix */
199582094794SBarry Smith         nlocal = m;
199682094794SBarry Smith       } else {
199782094794SBarry Smith         nlocal = n/size + ((n % size) > rank);
199882094794SBarry Smith       }
199982094794SBarry Smith     } else {
200082094794SBarry Smith       nlocal = csize/bs;
200182094794SBarry Smith     }
200282094794SBarry Smith     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
200382094794SBarry Smith     rstart = rend - nlocal;
200465e19b50SBarry Smith     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
200582094794SBarry Smith 
200682094794SBarry Smith     /* next, compute all the lengths */
2007dcca6d9dSJed Brown     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
200882094794SBarry Smith     for (i=0; i<m; i++) {
200982094794SBarry Smith       jend = ii[i+1] - ii[i];
201082094794SBarry Smith       olen = 0;
201182094794SBarry Smith       dlen = 0;
201282094794SBarry Smith       for (j=0; j<jend; j++) {
201382094794SBarry Smith         if (*jj < rstart || *jj >= rend) olen++;
201482094794SBarry Smith         else dlen++;
201582094794SBarry Smith         jj++;
201682094794SBarry Smith       }
201782094794SBarry Smith       olens[i] = olen;
201882094794SBarry Smith       dlens[i] = dlen;
201982094794SBarry Smith     }
202082094794SBarry Smith     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
202182094794SBarry Smith     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
202282094794SBarry Smith     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
202382094794SBarry Smith     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
20248f46ffcaSHong Zhang     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2025eb9baa12SBarry Smith     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
202682094794SBarry Smith   } else {
202782094794SBarry Smith     PetscInt ml,nl;
202882094794SBarry Smith 
202982094794SBarry Smith     M    = *newmat;
203082094794SBarry Smith     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2031e32f2f54SBarry Smith     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
203282094794SBarry Smith     ierr = MatZeroEntries(M);CHKERRQ(ierr);
203382094794SBarry Smith     /*
203482094794SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
203582094794SBarry Smith        rather than the slower MatSetValues().
203682094794SBarry Smith     */
203782094794SBarry Smith     M->was_assembled = PETSC_TRUE;
203882094794SBarry Smith     M->assembled     = PETSC_FALSE;
203982094794SBarry Smith   }
204082094794SBarry Smith   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
204182094794SBarry Smith   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
204282094794SBarry Smith   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
204382094794SBarry Smith   ii   = aij->i;
204482094794SBarry Smith   jj   = aij->j;
204582094794SBarry Smith   aa   = aij->a;
204682094794SBarry Smith   for (i=0; i<m; i++) {
204782094794SBarry Smith     row   = rstart/bs + i;
204882094794SBarry Smith     nz    = ii[i+1] - ii[i];
204982094794SBarry Smith     cwork = jj;     jj += nz;
205075f6568bSJed Brown     vwork = aa;     aa += nz*bs*bs;
205182094794SBarry Smith     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
205282094794SBarry Smith   }
205382094794SBarry Smith 
205482094794SBarry Smith   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
205582094794SBarry Smith   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
205682094794SBarry Smith   *newmat = M;
205782094794SBarry Smith 
205882094794SBarry Smith   /* save submatrix used in processor for next request */
205982094794SBarry Smith   if (call ==  MAT_INITIAL_MATRIX) {
206082094794SBarry Smith     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
206182094794SBarry Smith     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
206282094794SBarry Smith   }
206382094794SBarry Smith   PetscFunctionReturn(0);
206482094794SBarry Smith }
206582094794SBarry Smith 
206682094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
206782094794SBarry Smith {
206882094794SBarry Smith   MPI_Comm       comm,pcomm;
2069a0a83eb5SRémi Lacroix   PetscInt       clocal_size,nrows;
207082094794SBarry Smith   const PetscInt *rows;
2071dbf0e21dSBarry Smith   PetscMPIInt    size;
2072a0a83eb5SRémi Lacroix   IS             crowp,lcolp;
207382094794SBarry Smith   PetscErrorCode ierr;
207482094794SBarry Smith 
207582094794SBarry Smith   PetscFunctionBegin;
207682094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
207782094794SBarry Smith   /* make a collective version of 'rowp' */
207882094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
207982094794SBarry Smith   if (pcomm==comm) {
208082094794SBarry Smith     crowp = rowp;
208182094794SBarry Smith   } else {
208282094794SBarry Smith     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
208382094794SBarry Smith     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
208470b3c8c7SBarry Smith     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
208582094794SBarry Smith     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
208682094794SBarry Smith   }
2087a0a83eb5SRémi Lacroix   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2088a0a83eb5SRémi Lacroix   /* make a local version of 'colp' */
208982094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2090dbf0e21dSBarry Smith   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2091dbf0e21dSBarry Smith   if (size==1) {
209282094794SBarry Smith     lcolp = colp;
209382094794SBarry Smith   } else {
209475f6568bSJed Brown     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
209582094794SBarry Smith   }
2096dbf0e21dSBarry Smith   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
209775f6568bSJed Brown   /* now we just get the submatrix */
20987afc1a8bSJed Brown   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
20997dae84e0SHong Zhang   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2100a0a83eb5SRémi Lacroix   /* clean up */
2101a0a83eb5SRémi Lacroix   if (pcomm!=comm) {
2102a0a83eb5SRémi Lacroix     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2103a0a83eb5SRémi Lacroix   }
2104dbf0e21dSBarry Smith   if (size>1) {
21056bf464f9SBarry Smith     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
210682094794SBarry Smith   }
210782094794SBarry Smith   PetscFunctionReturn(0);
210882094794SBarry Smith }
210982094794SBarry Smith 
21107087cfbeSBarry Smith PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
21118c7482ecSBarry Smith {
21128c7482ecSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
21138c7482ecSBarry Smith   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
21148c7482ecSBarry Smith 
21158c7482ecSBarry Smith   PetscFunctionBegin;
211626fbe8dcSKarl Rupp   if (nghosts) *nghosts = B->nbs;
211726fbe8dcSKarl Rupp   if (ghosts) *ghosts = baij->garray;
21188c7482ecSBarry Smith   PetscFunctionReturn(0);
21198c7482ecSBarry Smith }
21208c7482ecSBarry Smith 
2121d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2122f6d58c54SBarry Smith {
2123f6d58c54SBarry Smith   Mat            B;
2124f6d58c54SBarry Smith   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2125f6d58c54SBarry Smith   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2126f6d58c54SBarry Smith   Mat_SeqAIJ     *b;
2127f6d58c54SBarry Smith   PetscErrorCode ierr;
2128f4259b30SLisandro Dalcin   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2129f6d58c54SBarry Smith   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2130f6d58c54SBarry Smith   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2131f6d58c54SBarry Smith 
2132f6d58c54SBarry Smith   PetscFunctionBegin;
2133ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2134ce94432eSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2135f6d58c54SBarry Smith 
2136f6d58c54SBarry Smith   /* ----------------------------------------------------------------
2137f6d58c54SBarry Smith      Tell every processor the number of nonzeros per row
2138f6d58c54SBarry Smith   */
2139854ce69bSBarry Smith   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2140f6d58c54SBarry Smith   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2141f6d58c54SBarry Smith     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2142f6d58c54SBarry Smith   }
2143785e854fSJed Brown   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2144f6d58c54SBarry Smith   displs    = recvcounts + size;
2145f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2146f6d58c54SBarry Smith     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2147f6d58c54SBarry Smith     displs[i]     = A->rmap->range[i]/bs;
2148f6d58c54SBarry Smith   }
2149f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2150ce94432eSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2151f6d58c54SBarry Smith #else
21523d3eaba7SBarry Smith   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2153ce94432eSBarry Smith   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2154f6d58c54SBarry Smith #endif
2155f6d58c54SBarry Smith   /* ---------------------------------------------------------------
2156f6d58c54SBarry Smith      Create the sequential matrix of the same type as the local block diagonal
2157f6d58c54SBarry Smith   */
2158f6d58c54SBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2159f6d58c54SBarry Smith   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2160f6d58c54SBarry Smith   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2161f6d58c54SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2162f6d58c54SBarry Smith   b    = (Mat_SeqAIJ*)B->data;
2163f6d58c54SBarry Smith 
2164f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2165f6d58c54SBarry Smith     Copy my part of matrix column indices over
2166f6d58c54SBarry Smith   */
2167f6d58c54SBarry Smith   sendcount  = ad->nz + bd->nz;
2168f6d58c54SBarry Smith   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2169f6d58c54SBarry Smith   a_jsendbuf = ad->j;
2170f6d58c54SBarry Smith   b_jsendbuf = bd->j;
2171f6d58c54SBarry Smith   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2172f6d58c54SBarry Smith   cnt        = 0;
2173f6d58c54SBarry Smith   for (i=0; i<n; i++) {
2174f6d58c54SBarry Smith 
2175f6d58c54SBarry Smith     /* put in lower diagonal portion */
2176f6d58c54SBarry Smith     m = bd->i[i+1] - bd->i[i];
2177f6d58c54SBarry Smith     while (m > 0) {
2178f6d58c54SBarry Smith       /* is it above diagonal (in bd (compressed) numbering) */
2179f6d58c54SBarry Smith       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2180f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2181f6d58c54SBarry Smith       m--;
2182f6d58c54SBarry Smith     }
2183f6d58c54SBarry Smith 
2184f6d58c54SBarry Smith     /* put in diagonal portion */
2185f6d58c54SBarry Smith     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2186f6d58c54SBarry Smith       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2187f6d58c54SBarry Smith     }
2188f6d58c54SBarry Smith 
2189f6d58c54SBarry Smith     /* put in upper diagonal portion */
2190f6d58c54SBarry Smith     while (m-- > 0) {
2191f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2192f6d58c54SBarry Smith     }
2193f6d58c54SBarry Smith   }
2194e32f2f54SBarry Smith   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2195f6d58c54SBarry Smith 
2196f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2197f6d58c54SBarry Smith     Gather all column indices to all processors
2198f6d58c54SBarry Smith   */
2199f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2200f6d58c54SBarry Smith     recvcounts[i] = 0;
2201f6d58c54SBarry Smith     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2202f6d58c54SBarry Smith       recvcounts[i] += lens[j];
2203f6d58c54SBarry Smith     }
2204f6d58c54SBarry Smith   }
2205f6d58c54SBarry Smith   displs[0] = 0;
2206f6d58c54SBarry Smith   for (i=1; i<size; i++) {
2207f6d58c54SBarry Smith     displs[i] = displs[i-1] + recvcounts[i-1];
2208f6d58c54SBarry Smith   }
2209f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2210ce94432eSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2211f6d58c54SBarry Smith #else
2212ce94432eSBarry Smith   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2213f6d58c54SBarry Smith #endif
2214f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2215f6d58c54SBarry Smith     Assemble the matrix into useable form (note numerical values not yet set)
2216f6d58c54SBarry Smith   */
2217f6d58c54SBarry Smith   /* set the b->ilen (length of each row) values */
2218580bdb30SBarry Smith   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2219f6d58c54SBarry Smith   /* set the b->i indices */
2220f6d58c54SBarry Smith   b->i[0] = 0;
2221f6d58c54SBarry Smith   for (i=1; i<=A->rmap->N/bs; i++) {
2222f6d58c54SBarry Smith     b->i[i] = b->i[i-1] + lens[i-1];
2223f6d58c54SBarry Smith   }
2224f6d58c54SBarry Smith   ierr = PetscFree(lens);CHKERRQ(ierr);
2225f6d58c54SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2226f6d58c54SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2227f6d58c54SBarry Smith   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2228f6d58c54SBarry Smith 
2229f6d58c54SBarry Smith   if (A->symmetric) {
2230f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2231f6d58c54SBarry Smith   } else if (A->hermitian) {
2232f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2233f6d58c54SBarry Smith   } else if (A->structurally_symmetric) {
2234f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2235f6d58c54SBarry Smith   }
2236f6d58c54SBarry Smith   *newmat = B;
2237f6d58c54SBarry Smith   PetscFunctionReturn(0);
2238f6d58c54SBarry Smith }
2239f6d58c54SBarry Smith 
2240b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2241b1a666ecSBarry Smith {
2242b1a666ecSBarry Smith   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2243b1a666ecSBarry Smith   PetscErrorCode ierr;
2244f4259b30SLisandro Dalcin   Vec            bb1 = NULL;
2245b1a666ecSBarry Smith 
2246b1a666ecSBarry Smith   PetscFunctionBegin;
2247b1a666ecSBarry Smith   if (flag == SOR_APPLY_UPPER) {
2248b1a666ecSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2249b1a666ecSBarry Smith     PetscFunctionReturn(0);
2250b1a666ecSBarry Smith   }
2251b1a666ecSBarry Smith 
22524e980039SJed Brown   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
22534e980039SJed Brown     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
22544e980039SJed Brown   }
22554e980039SJed Brown 
2256b1a666ecSBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2257b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2258b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2259b1a666ecSBarry Smith       its--;
2260b1a666ecSBarry Smith     }
2261b1a666ecSBarry Smith 
2262b1a666ecSBarry Smith     while (its--) {
2263b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2264b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2265b1a666ecSBarry Smith 
2266b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2267b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2268b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2269b1a666ecSBarry Smith 
2270b1a666ecSBarry Smith       /* local sweep */
2271b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2272b1a666ecSBarry Smith     }
2273b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2274b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2275b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2276b1a666ecSBarry Smith       its--;
2277b1a666ecSBarry Smith     }
2278b1a666ecSBarry Smith     while (its--) {
2279b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2280b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2281b1a666ecSBarry Smith 
2282b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2283b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2284b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2285b1a666ecSBarry Smith 
2286b1a666ecSBarry Smith       /* local sweep */
2287b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2288b1a666ecSBarry Smith     }
2289b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2290b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2291b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2292b1a666ecSBarry Smith       its--;
2293b1a666ecSBarry Smith     }
2294b1a666ecSBarry Smith     while (its--) {
2295b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2296b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2297b1a666ecSBarry Smith 
2298b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2299b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2300b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2301b1a666ecSBarry Smith 
2302b1a666ecSBarry Smith       /* local sweep */
2303b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2304b1a666ecSBarry Smith     }
2305ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2306b1a666ecSBarry Smith 
23076bf464f9SBarry Smith   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2308b1a666ecSBarry Smith   PetscFunctionReturn(0);
2309b1a666ecSBarry Smith }
2310b1a666ecSBarry Smith 
231147f7623dSRémi Lacroix PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
231247f7623dSRémi Lacroix {
231347f7623dSRémi Lacroix   PetscErrorCode ierr;
231447f7623dSRémi Lacroix   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
231547f7623dSRémi Lacroix   PetscInt       N,i,*garray = aij->garray;
231647f7623dSRémi Lacroix   PetscInt       ib,jb,bs = A->rmap->bs;
231747f7623dSRémi Lacroix   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
231847f7623dSRémi Lacroix   MatScalar      *a_val = a_aij->a;
231947f7623dSRémi Lacroix   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
232047f7623dSRémi Lacroix   MatScalar      *b_val = b_aij->a;
232147f7623dSRémi Lacroix   PetscReal      *work;
232247f7623dSRémi Lacroix 
232347f7623dSRémi Lacroix   PetscFunctionBegin;
232447f7623dSRémi Lacroix   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
23251795a4d1SJed Brown   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
232647f7623dSRémi Lacroix   if (type == NORM_2) {
232747f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
232847f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
232947f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
233047f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
233147f7623dSRémi Lacroix           a_val++;
233247f7623dSRémi Lacroix         }
233347f7623dSRémi Lacroix       }
233447f7623dSRémi Lacroix     }
233547f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
233647f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
233747f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
233847f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
233947f7623dSRémi Lacroix           b_val++;
234047f7623dSRémi Lacroix         }
234147f7623dSRémi Lacroix       }
234247f7623dSRémi Lacroix     }
234347f7623dSRémi Lacroix   } else if (type == NORM_1) {
234447f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
234547f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
234647f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
234747f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
234847f7623dSRémi Lacroix           a_val++;
234947f7623dSRémi Lacroix         }
235047f7623dSRémi Lacroix       }
235147f7623dSRémi Lacroix     }
235247f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
235347f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
235447f7623dSRémi Lacroix        for (ib=0; ib<bs; ib++) {
235547f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
235647f7623dSRémi Lacroix           b_val++;
235747f7623dSRémi Lacroix         }
235847f7623dSRémi Lacroix       }
235947f7623dSRémi Lacroix     }
236047f7623dSRémi Lacroix   } else if (type == NORM_INFINITY) {
236147f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
236247f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
236347f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
236447f7623dSRémi Lacroix           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
236547f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
236647f7623dSRémi Lacroix           a_val++;
236747f7623dSRémi Lacroix         }
236847f7623dSRémi Lacroix       }
236947f7623dSRémi Lacroix     }
237047f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
237147f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
237247f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
237347f7623dSRémi Lacroix           int col = garray[b_aij->j[i]] * bs + jb;
237447f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
237547f7623dSRémi Lacroix           b_val++;
237647f7623dSRémi Lacroix         }
237747f7623dSRémi Lacroix       }
237847f7623dSRémi Lacroix     }
237947f7623dSRémi Lacroix   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
238047f7623dSRémi Lacroix   if (type == NORM_INFINITY) {
2381b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
238247f7623dSRémi Lacroix   } else {
2383b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
238447f7623dSRémi Lacroix   }
238547f7623dSRémi Lacroix   ierr = PetscFree(work);CHKERRQ(ierr);
238647f7623dSRémi Lacroix   if (type == NORM_2) {
238747f7623dSRémi Lacroix     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
238847f7623dSRémi Lacroix   }
238947f7623dSRémi Lacroix   PetscFunctionReturn(0);
239047f7623dSRémi Lacroix }
239147f7623dSRémi Lacroix 
2392713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2393bbead8a2SBarry Smith {
2394bbead8a2SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2395bbead8a2SBarry Smith   PetscErrorCode ierr;
2396bbead8a2SBarry Smith 
2397bbead8a2SBarry Smith   PetscFunctionBegin;
2398bbead8a2SBarry Smith   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
23997b6c816cSBarry Smith   A->factorerrortype             = a->A->factorerrortype;
24007b6c816cSBarry Smith   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
24017b6c816cSBarry Smith   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2402bbead8a2SBarry Smith   PetscFunctionReturn(0);
2403bbead8a2SBarry Smith }
2404bbead8a2SBarry Smith 
24057d68702bSBarry Smith PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
24067d68702bSBarry Smith {
24077d68702bSBarry Smith   PetscErrorCode ierr;
24087d68702bSBarry Smith   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
24096f33a894SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
24107d68702bSBarry Smith 
24117d68702bSBarry Smith   PetscFunctionBegin;
24126f33a894SBarry Smith   if (!Y->preallocated) {
24137d68702bSBarry Smith     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
24146f33a894SBarry Smith   } else if (!aij->nz) {
2415b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
24166f33a894SBarry Smith     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2417b83222d8SBarry Smith     aij->nonew = nonew;
24187d68702bSBarry Smith   }
24197d68702bSBarry Smith   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
24207d68702bSBarry Smith   PetscFunctionReturn(0);
24217d68702bSBarry Smith }
24228c7482ecSBarry Smith 
24233b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
24243b49f96aSBarry Smith {
24253b49f96aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
24263b49f96aSBarry Smith   PetscErrorCode ierr;
24273b49f96aSBarry Smith 
24283b49f96aSBarry Smith   PetscFunctionBegin;
24293b49f96aSBarry Smith   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
24303b49f96aSBarry Smith   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
24313b49f96aSBarry Smith   if (d) {
24323b49f96aSBarry Smith     PetscInt rstart;
24333b49f96aSBarry Smith     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
24343b49f96aSBarry Smith     *d += rstart/A->rmap->bs;
24353b49f96aSBarry Smith 
24363b49f96aSBarry Smith   }
24373b49f96aSBarry Smith   PetscFunctionReturn(0);
24383b49f96aSBarry Smith }
24393b49f96aSBarry Smith 
2440a5b7ff6bSBarry Smith PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2441a5b7ff6bSBarry Smith {
2442a5b7ff6bSBarry Smith   PetscFunctionBegin;
2443a5b7ff6bSBarry Smith   *a = ((Mat_MPIBAIJ*)A->data)->A;
2444a5b7ff6bSBarry Smith   PetscFunctionReturn(0);
2445a5b7ff6bSBarry Smith }
2446a5b7ff6bSBarry Smith 
244779bdfe76SSatish Balay /* -------------------------------------------------------------------*/
24483964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2449cc2dc46cSBarry Smith                                        MatGetRow_MPIBAIJ,
2450cc2dc46cSBarry Smith                                        MatRestoreRow_MPIBAIJ,
2451cc2dc46cSBarry Smith                                        MatMult_MPIBAIJ,
245297304618SKris Buschelman                                 /* 4*/ MatMultAdd_MPIBAIJ,
24537c922b88SBarry Smith                                        MatMultTranspose_MPIBAIJ,
24547c922b88SBarry Smith                                        MatMultTransposeAdd_MPIBAIJ,
2455f4259b30SLisandro Dalcin                                        NULL,
2456f4259b30SLisandro Dalcin                                        NULL,
2457f4259b30SLisandro Dalcin                                        NULL,
2458f4259b30SLisandro Dalcin                                 /*10*/ NULL,
2459f4259b30SLisandro Dalcin                                        NULL,
2460f4259b30SLisandro Dalcin                                        NULL,
2461b1a666ecSBarry Smith                                        MatSOR_MPIBAIJ,
2462cc2dc46cSBarry Smith                                        MatTranspose_MPIBAIJ,
246397304618SKris Buschelman                                 /*15*/ MatGetInfo_MPIBAIJ,
24647fc3c18eSBarry Smith                                        MatEqual_MPIBAIJ,
2465cc2dc46cSBarry Smith                                        MatGetDiagonal_MPIBAIJ,
2466cc2dc46cSBarry Smith                                        MatDiagonalScale_MPIBAIJ,
2467cc2dc46cSBarry Smith                                        MatNorm_MPIBAIJ,
246897304618SKris Buschelman                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2469cc2dc46cSBarry Smith                                        MatAssemblyEnd_MPIBAIJ,
2470cc2dc46cSBarry Smith                                        MatSetOption_MPIBAIJ,
2471cc2dc46cSBarry Smith                                        MatZeroEntries_MPIBAIJ,
2472d519adbfSMatthew Knepley                                 /*24*/ MatZeroRows_MPIBAIJ,
2473f4259b30SLisandro Dalcin                                        NULL,
2474f4259b30SLisandro Dalcin                                        NULL,
2475f4259b30SLisandro Dalcin                                        NULL,
2476f4259b30SLisandro Dalcin                                        NULL,
24774994cf47SJed Brown                                 /*29*/ MatSetUp_MPIBAIJ,
2478f4259b30SLisandro Dalcin                                        NULL,
2479f4259b30SLisandro Dalcin                                        NULL,
2480a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPIBAIJ,
2481f4259b30SLisandro Dalcin                                        NULL,
2482d519adbfSMatthew Knepley                                 /*34*/ MatDuplicate_MPIBAIJ,
2483f4259b30SLisandro Dalcin                                        NULL,
2484f4259b30SLisandro Dalcin                                        NULL,
2485f4259b30SLisandro Dalcin                                        NULL,
2486f4259b30SLisandro Dalcin                                        NULL,
2487d519adbfSMatthew Knepley                                 /*39*/ MatAXPY_MPIBAIJ,
24887dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIBAIJ,
2489cc2dc46cSBarry Smith                                        MatIncreaseOverlap_MPIBAIJ,
2490cc2dc46cSBarry Smith                                        MatGetValues_MPIBAIJ,
24913c896bc6SHong Zhang                                        MatCopy_MPIBAIJ,
2492f4259b30SLisandro Dalcin                                 /*44*/ NULL,
2493cc2dc46cSBarry Smith                                        MatScale_MPIBAIJ,
24947d68702bSBarry Smith                                        MatShift_MPIBAIJ,
2495f4259b30SLisandro Dalcin                                        NULL,
24966f0a72daSMatthew G. Knepley                                        MatZeroRowsColumns_MPIBAIJ,
2497f4259b30SLisandro Dalcin                                 /*49*/ NULL,
2498f4259b30SLisandro Dalcin                                        NULL,
2499f4259b30SLisandro Dalcin                                        NULL,
2500f4259b30SLisandro Dalcin                                        NULL,
2501f4259b30SLisandro Dalcin                                        NULL,
250293dfae19SHong Zhang                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2503f4259b30SLisandro Dalcin                                        NULL,
2504cc2dc46cSBarry Smith                                        MatSetUnfactored_MPIBAIJ,
250582094794SBarry Smith                                        MatPermute_MPIBAIJ,
2506cc2dc46cSBarry Smith                                        MatSetValuesBlocked_MPIBAIJ,
25077dae84e0SHong Zhang                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2508f14a1c24SBarry Smith                                        MatDestroy_MPIBAIJ,
2509f14a1c24SBarry Smith                                        MatView_MPIBAIJ,
2510f4259b30SLisandro Dalcin                                        NULL,
2511f4259b30SLisandro Dalcin                                        NULL,
2512f4259b30SLisandro Dalcin                                 /*64*/ NULL,
2513f4259b30SLisandro Dalcin                                        NULL,
2514f4259b30SLisandro Dalcin                                        NULL,
2515f4259b30SLisandro Dalcin                                        NULL,
2516f4259b30SLisandro Dalcin                                        NULL,
2517d519adbfSMatthew Knepley                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2518f4259b30SLisandro Dalcin                                        NULL,
2519f4259b30SLisandro Dalcin                                        NULL,
2520f4259b30SLisandro Dalcin                                        NULL,
2521f4259b30SLisandro Dalcin                                        NULL,
2522f4259b30SLisandro Dalcin                                 /*74*/ NULL,
2523f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
2524f4259b30SLisandro Dalcin                                        NULL,
2525f4259b30SLisandro Dalcin                                        NULL,
2526f4259b30SLisandro Dalcin                                        NULL,
2527f4259b30SLisandro Dalcin                                 /*79*/ NULL,
2528f4259b30SLisandro Dalcin                                        NULL,
2529f4259b30SLisandro Dalcin                                        NULL,
2530f4259b30SLisandro Dalcin                                        NULL,
25315bba2384SShri Abhyankar                                        MatLoad_MPIBAIJ,
2532f4259b30SLisandro Dalcin                                 /*84*/ NULL,
2533f4259b30SLisandro Dalcin                                        NULL,
2534f4259b30SLisandro Dalcin                                        NULL,
2535f4259b30SLisandro Dalcin                                        NULL,
2536f4259b30SLisandro Dalcin                                        NULL,
2537f4259b30SLisandro Dalcin                                 /*89*/ NULL,
2538f4259b30SLisandro Dalcin                                        NULL,
2539f4259b30SLisandro Dalcin                                        NULL,
2540f4259b30SLisandro Dalcin                                        NULL,
2541f4259b30SLisandro Dalcin                                        NULL,
2542f4259b30SLisandro Dalcin                                 /*94*/ NULL,
2543f4259b30SLisandro Dalcin                                        NULL,
2544f4259b30SLisandro Dalcin                                        NULL,
2545f4259b30SLisandro Dalcin                                        NULL,
2546f4259b30SLisandro Dalcin                                        NULL,
2547f4259b30SLisandro Dalcin                                 /*99*/ NULL,
2548f4259b30SLisandro Dalcin                                        NULL,
2549f4259b30SLisandro Dalcin                                        NULL,
2550f4259b30SLisandro Dalcin                                        NULL,
2551f4259b30SLisandro Dalcin                                        NULL,
2552f4259b30SLisandro Dalcin                                 /*104*/NULL,
255399cafbc1SBarry Smith                                        MatRealPart_MPIBAIJ,
25548c7482ecSBarry Smith                                        MatImaginaryPart_MPIBAIJ,
2555f4259b30SLisandro Dalcin                                        NULL,
2556f4259b30SLisandro Dalcin                                        NULL,
2557f4259b30SLisandro Dalcin                                 /*109*/NULL,
2558f4259b30SLisandro Dalcin                                        NULL,
2559f4259b30SLisandro Dalcin                                        NULL,
2560f4259b30SLisandro Dalcin                                        NULL,
25613b49f96aSBarry Smith                                        MatMissingDiagonal_MPIBAIJ,
2562d1adec66SJed Brown                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2563f4259b30SLisandro Dalcin                                        NULL,
25644683f7a4SShri Abhyankar                                        MatGetGhosts_MPIBAIJ,
2565f4259b30SLisandro Dalcin                                        NULL,
2566f4259b30SLisandro Dalcin                                        NULL,
2567f4259b30SLisandro Dalcin                                 /*119*/NULL,
2568f4259b30SLisandro Dalcin                                        NULL,
2569f4259b30SLisandro Dalcin                                        NULL,
2570f4259b30SLisandro Dalcin                                        NULL,
2571e8271787SHong Zhang                                        MatGetMultiProcBlock_MPIBAIJ,
2572f4259b30SLisandro Dalcin                                 /*124*/NULL,
257347f7623dSRémi Lacroix                                        MatGetColumnNorms_MPIBAIJ,
25743964eb88SJed Brown                                        MatInvertBlockDiagonal_MPIBAIJ,
2575f4259b30SLisandro Dalcin                                        NULL,
2576f4259b30SLisandro Dalcin                                        NULL,
2577f4259b30SLisandro Dalcin                                /*129*/ NULL,
2578f4259b30SLisandro Dalcin                                        NULL,
2579f4259b30SLisandro Dalcin                                        NULL,
2580f4259b30SLisandro Dalcin                                        NULL,
2581f4259b30SLisandro Dalcin                                        NULL,
2582f4259b30SLisandro Dalcin                                /*134*/ NULL,
2583f4259b30SLisandro Dalcin                                        NULL,
2584f4259b30SLisandro Dalcin                                        NULL,
2585f4259b30SLisandro Dalcin                                        NULL,
2586f4259b30SLisandro Dalcin                                        NULL,
258746533700Sstefano_zampini                                /*139*/ MatSetBlockSizes_Default,
2588f4259b30SLisandro Dalcin                                        NULL,
2589f4259b30SLisandro Dalcin                                        NULL,
2590bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_MPIXAIJ,
2591f4259b30SLisandro Dalcin                                        NULL,
2592bdf6f3fcSHong Zhang                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
25938c7482ecSBarry Smith };
259479bdfe76SSatish Balay 
259579bdfe76SSatish Balay 
2596cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2597c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2598d94109b8SHong Zhang 
2599cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2600aac34f13SBarry Smith {
2601b8d659d7SLisandro Dalcin   PetscInt       m,rstart,cstart,cend;
2602f4259b30SLisandro Dalcin   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2603f4259b30SLisandro Dalcin   const PetscInt *JJ    =NULL;
2604f4259b30SLisandro Dalcin   PetscScalar    *values=NULL;
2605d47bf9aaSJed Brown   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2606aac34f13SBarry Smith   PetscErrorCode ierr;
26073bd0feecSPierre Jolivet   PetscBool      nooffprocentries;
2608aac34f13SBarry Smith 
2609aac34f13SBarry Smith   PetscFunctionBegin;
261026283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
261126283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
261226283091SBarry Smith   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
261326283091SBarry Smith   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2614e02043d6SBarry Smith   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2615d0f46423SBarry Smith   m      = B->rmap->n/bs;
2616d0f46423SBarry Smith   rstart = B->rmap->rstart/bs;
2617d0f46423SBarry Smith   cstart = B->cmap->rstart/bs;
2618d0f46423SBarry Smith   cend   = B->cmap->rend/bs;
2619b8d659d7SLisandro Dalcin 
2620e32f2f54SBarry Smith   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2621dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2622aac34f13SBarry Smith   for (i=0; i<m; i++) {
2623cf12db73SBarry Smith     nz = ii[i+1] - ii[i];
2624e32f2f54SBarry Smith     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2625b8d659d7SLisandro Dalcin     nz_max = PetscMax(nz_max,nz);
262637cd3c0dSBarry Smith     dlen   = 0;
262737cd3c0dSBarry Smith     olen   = 0;
2628cf12db73SBarry Smith     JJ     = jj + ii[i];
2629b8d659d7SLisandro Dalcin     for (j=0; j<nz; j++) {
263037cd3c0dSBarry Smith       if (*JJ < cstart || *JJ >= cend) olen++;
263137cd3c0dSBarry Smith       else dlen++;
2632aac34f13SBarry Smith       JJ++;
2633aac34f13SBarry Smith     }
263437cd3c0dSBarry Smith     d_nnz[i] = dlen;
263537cd3c0dSBarry Smith     o_nnz[i] = olen;
2636aac34f13SBarry Smith   }
2637aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2638fca92195SBarry Smith   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2639aac34f13SBarry Smith 
2640b8d659d7SLisandro Dalcin   values = (PetscScalar*)V;
2641b8d659d7SLisandro Dalcin   if (!values) {
264237cd3c0dSBarry Smith     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2643b8d659d7SLisandro Dalcin   }
2644b8d659d7SLisandro Dalcin   for (i=0; i<m; i++) {
2645b8d659d7SLisandro Dalcin     PetscInt          row    = i + rstart;
2646cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
2647cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
2648bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2649cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2650b8d659d7SLisandro Dalcin       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
26513adadaf3SJed Brown     } else {                    /* block ordering does not match so we can only insert one block at a time. */
26523adadaf3SJed Brown       PetscInt j;
26533adadaf3SJed Brown       for (j=0; j<ncols; j++) {
26543adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
26553adadaf3SJed Brown         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
26563adadaf3SJed Brown       }
26573adadaf3SJed Brown     }
2658aac34f13SBarry Smith   }
2659aac34f13SBarry Smith 
2660b8d659d7SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
26613bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
26623bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
2663aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
26653bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
26663bd0feecSPierre Jolivet 
26677827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2668aac34f13SBarry Smith   PetscFunctionReturn(0);
2669aac34f13SBarry Smith }
2670aac34f13SBarry Smith 
2671aac34f13SBarry Smith /*@C
2672664954b6SBarry Smith    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2673aac34f13SBarry Smith 
2674d083f849SBarry Smith    Collective
2675aac34f13SBarry Smith 
2676aac34f13SBarry Smith    Input Parameters:
26771c4f3114SJed Brown +  B - the matrix
2678dfb205c3SBarry Smith .  bs - the block size
2679aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2680aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2681aac34f13SBarry Smith -  v - optional values in the matrix
2682aac34f13SBarry Smith 
2683664954b6SBarry Smith    Level: advanced
2684aac34f13SBarry Smith 
268595452b02SPatrick Sanan    Notes:
268695452b02SPatrick Sanan     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
26873adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
26883adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
26893adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
26903adadaf3SJed Brown    block column and the second index is over columns within a block.
26913adadaf3SJed Brown 
2692664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2693664954b6SBarry Smith 
26943adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2695aac34f13SBarry Smith @*/
26967087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2697aac34f13SBarry Smith {
26984ac538c5SBarry Smith   PetscErrorCode ierr;
2699aac34f13SBarry Smith 
2700aac34f13SBarry Smith   PetscFunctionBegin;
27016ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
27026ba663aaSJed Brown   PetscValidType(B,1);
27036ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
27044ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2705aac34f13SBarry Smith   PetscFunctionReturn(0);
2706aac34f13SBarry Smith }
2707aac34f13SBarry Smith 
2708b2573a8aSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2709a23d5eceSKris Buschelman {
2710a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2711dfbe8321SBarry Smith   PetscErrorCode ierr;
2712535b19f3SBarry Smith   PetscInt       i;
27135d2a9ed1SStefano Zampini   PetscMPIInt    size;
2714a23d5eceSKris Buschelman 
2715a23d5eceSKris Buschelman   PetscFunctionBegin;
271633d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
271726283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
271826283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2719e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2720899cda47SBarry Smith 
2721a23d5eceSKris Buschelman   if (d_nnz) {
2722d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2723e32f2f54SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2724a23d5eceSKris Buschelman     }
2725a23d5eceSKris Buschelman   }
2726a23d5eceSKris Buschelman   if (o_nnz) {
2727d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2728e32f2f54SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2729a23d5eceSKris Buschelman     }
2730a23d5eceSKris Buschelman   }
2731a23d5eceSKris Buschelman 
2732a23d5eceSKris Buschelman   b      = (Mat_MPIBAIJ*)B->data;
2733a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2734d0f46423SBarry Smith   b->mbs = B->rmap->n/bs;
2735d0f46423SBarry Smith   b->nbs = B->cmap->n/bs;
2736d0f46423SBarry Smith   b->Mbs = B->rmap->N/bs;
2737d0f46423SBarry Smith   b->Nbs = B->cmap->N/bs;
2738a23d5eceSKris Buschelman 
2739a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2740d0f46423SBarry Smith     b->rangebs[i] = B->rmap->range[i]/bs;
2741a23d5eceSKris Buschelman   }
2742d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart/bs;
2743d0f46423SBarry Smith   b->rendbs   = B->rmap->rend/bs;
2744d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart/bs;
2745d0f46423SBarry Smith   b->cendbs   = B->cmap->rend/bs;
2746a23d5eceSKris Buschelman 
2747cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
2748cb7b82ddSBarry Smith   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2749cb7b82ddSBarry Smith #else
2750cb7b82ddSBarry Smith   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2751cb7b82ddSBarry Smith #endif
2752cb7b82ddSBarry Smith   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2753cb7b82ddSBarry Smith   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2754cb7b82ddSBarry Smith   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2755cb7b82ddSBarry Smith 
2756cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
27575d2a9ed1SStefano Zampini   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2758cb7b82ddSBarry Smith   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2759cb7b82ddSBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
27605d2a9ed1SStefano Zampini   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2761cb7b82ddSBarry Smith   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2762cb7b82ddSBarry Smith   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2763cb7b82ddSBarry Smith 
2764526dfc15SBarry Smith   if (!B->preallocated) {
2765f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2766d0f46423SBarry Smith     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
27679c097c71SKris Buschelman     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
27683bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2769ce94432eSBarry Smith     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2770526dfc15SBarry Smith   }
2771a23d5eceSKris Buschelman 
2772526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2773526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2774526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
2775cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
2776cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
2777a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2778a23d5eceSKris Buschelman }
2779a23d5eceSKris Buschelman 
27807087cfbeSBarry Smith extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
27817087cfbeSBarry Smith extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
27825bf65638SKris Buschelman 
2783cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
278482094794SBarry Smith {
278582094794SBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
278682094794SBarry Smith   PetscErrorCode ierr;
278782094794SBarry Smith   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
278882094794SBarry Smith   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
278982094794SBarry Smith   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
279082094794SBarry Smith 
279182094794SBarry Smith   PetscFunctionBegin;
2792854ce69bSBarry Smith   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
279382094794SBarry Smith   ii[0] = 0;
279482094794SBarry Smith   for (i=0; i<M; i++) {
2795e32f2f54SBarry Smith     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2796e32f2f54SBarry Smith     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
279782094794SBarry Smith     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
27985ee9ba1cSJed Brown     /* remove one from count of matrix has diagonal */
27995ee9ba1cSJed Brown     for (j=id[i]; j<id[i+1]; j++) {
28005ee9ba1cSJed Brown       if (jd[j] == i) {ii[i+1]--;break;}
28015ee9ba1cSJed Brown     }
280282094794SBarry Smith   }
2803785e854fSJed Brown   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
280482094794SBarry Smith   cnt  = 0;
280582094794SBarry Smith   for (i=0; i<M; i++) {
280682094794SBarry Smith     for (j=io[i]; j<io[i+1]; j++) {
280782094794SBarry Smith       if (garray[jo[j]] > rstart) break;
280882094794SBarry Smith       jj[cnt++] = garray[jo[j]];
280982094794SBarry Smith     }
281082094794SBarry Smith     for (k=id[i]; k<id[i+1]; k++) {
28115ee9ba1cSJed Brown       if (jd[k] != i) {
281282094794SBarry Smith         jj[cnt++] = rstart + jd[k];
281382094794SBarry Smith       }
28145ee9ba1cSJed Brown     }
281582094794SBarry Smith     for (; j<io[i+1]; j++) {
281682094794SBarry Smith       jj[cnt++] = garray[jo[j]];
281782094794SBarry Smith     }
281882094794SBarry Smith   }
2819ce94432eSBarry Smith   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
282082094794SBarry Smith   PetscFunctionReturn(0);
282182094794SBarry Smith }
282282094794SBarry Smith 
2823c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
282462471d69SBarry Smith 
2825cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2826b2573a8aSBarry Smith 
2827cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
282862471d69SBarry Smith {
282962471d69SBarry Smith   PetscErrorCode ierr;
283062471d69SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
283162471d69SBarry Smith   Mat            B;
283285a69837SSatish Balay   Mat_MPIAIJ     *b;
283362471d69SBarry Smith 
283462471d69SBarry Smith   PetscFunctionBegin;
2835ce94432eSBarry Smith   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
283662471d69SBarry Smith 
28370f6d62edSLisandro Dalcin   if (reuse == MAT_REUSE_MATRIX) {
28380f6d62edSLisandro Dalcin     B = *newmat;
28390f6d62edSLisandro Dalcin   } else {
2840ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
28416d0a4a0eSHong Zhang     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2842f090d951SRémi Lacroix     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2843f090d951SRémi Lacroix     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
28440298fd71SBarry Smith     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
28450298fd71SBarry Smith     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
28460f6d62edSLisandro Dalcin   }
284762471d69SBarry Smith   b = (Mat_MPIAIJ*) B->data;
284862471d69SBarry Smith 
28490f6d62edSLisandro Dalcin   if (reuse == MAT_REUSE_MATRIX) {
28500f6d62edSLisandro Dalcin     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
28510f6d62edSLisandro Dalcin     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
28520f6d62edSLisandro Dalcin   } else {
28536bf464f9SBarry Smith     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
28546bf464f9SBarry Smith     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2855ab9863d7SBarry Smith     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
285662471d69SBarry Smith     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
285762471d69SBarry Smith     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
28586a719282SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28596a719282SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28600f6d62edSLisandro Dalcin   }
28610f6d62edSLisandro Dalcin   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28620f6d62edSLisandro Dalcin   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28630f6d62edSLisandro Dalcin 
2864511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
286528be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
286662471d69SBarry Smith   } else {
286762471d69SBarry Smith    *newmat = B;
286862471d69SBarry Smith   }
286962471d69SBarry Smith   PetscFunctionReturn(0);
287062471d69SBarry Smith }
287162471d69SBarry Smith 
28720bad9183SKris Buschelman /*MC
2873fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
28740bad9183SKris Buschelman 
28750bad9183SKris Buschelman    Options Database Keys:
28768c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
28778c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix
28788c07d4e3SBarry Smith - -mat_use_hash_table <fact>
28790bad9183SKris Buschelman 
28800bad9183SKris Buschelman    Level: beginner
28810cd7f59aSBarry Smith 
28820cd7f59aSBarry Smith    Notes:
28830cd7f59aSBarry Smith     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
28840cd7f59aSBarry Smith     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
28850bad9183SKris Buschelman 
2886fd292e60Sprj- .seealso: MatCreateBAIJ
28870bad9183SKris Buschelman M*/
28880bad9183SKris Buschelman 
2889cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2890c0cdd4a1SDahai Guo 
28918cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2892273d9f13SBarry Smith {
2893273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
2894dfbe8321SBarry Smith   PetscErrorCode ierr;
289594ae4db5SBarry Smith   PetscBool      flg = PETSC_FALSE;
2896273d9f13SBarry Smith 
2897273d9f13SBarry Smith   PetscFunctionBegin;
2898b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
289982502324SSatish Balay   B->data = (void*)b;
290082502324SSatish Balay 
2901273d9f13SBarry Smith   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2902273d9f13SBarry Smith   B->assembled = PETSC_FALSE;
2903273d9f13SBarry Smith 
2904273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2905ce94432eSBarry Smith   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2906ce94432eSBarry Smith   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2907273d9f13SBarry Smith 
2908273d9f13SBarry Smith   /* build local table of row and column ownerships */
2909854ce69bSBarry Smith   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2910273d9f13SBarry Smith 
2911273d9f13SBarry Smith   /* build cache for off array entries formed */
2912ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
291326fbe8dcSKarl Rupp 
2914273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
29150298fd71SBarry Smith   b->colmap      = NULL;
29160298fd71SBarry Smith   b->garray      = NULL;
2917273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2918273d9f13SBarry Smith 
2919273d9f13SBarry Smith   /* stuff used in block assembly */
2920f4259b30SLisandro Dalcin   b->barray = NULL;
2921273d9f13SBarry Smith 
2922273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2923f4259b30SLisandro Dalcin   b->lvec  = NULL;
2924f4259b30SLisandro Dalcin   b->Mvctx = NULL;
2925273d9f13SBarry Smith 
2926273d9f13SBarry Smith   /* stuff for MatGetRow() */
2927f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2928f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2929273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2930273d9f13SBarry Smith 
2931273d9f13SBarry Smith   /* hash table stuff */
2932f4259b30SLisandro Dalcin   b->ht           = NULL;
2933f4259b30SLisandro Dalcin   b->hd           = NULL;
2934273d9f13SBarry Smith   b->ht_size      = 0;
2935273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2936273d9f13SBarry Smith   b->ht_fact      = 0;
2937273d9f13SBarry Smith   b->ht_total_ct  = 0;
2938273d9f13SBarry Smith   b->ht_insert_ct = 0;
2939273d9f13SBarry Smith 
29407dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
29417a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
29427a868f3eSHong Zhang 
29438c07d4e3SBarry Smith 
2944bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2945bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2946bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
29477ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
29487ea3e4caSstefano_zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
29497ea3e4caSstefano_zampini #endif
2950bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2951bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2952bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2953bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2954bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2955bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2956c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
295717667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
295894ae4db5SBarry Smith 
295994ae4db5SBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2960abf3b562SBarry Smith   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
296194ae4db5SBarry Smith   if (flg) {
296294ae4db5SBarry Smith     PetscReal fact = 1.39;
296394ae4db5SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
296494ae4db5SBarry Smith     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
296594ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
296694ae4db5SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
296794ae4db5SBarry Smith     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
296894ae4db5SBarry Smith   }
296994ae4db5SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2970273d9f13SBarry Smith   PetscFunctionReturn(0);
2971273d9f13SBarry Smith }
2972273d9f13SBarry Smith 
2973209238afSKris Buschelman /*MC
2974002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2975209238afSKris Buschelman 
2976209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2977209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2978209238afSKris Buschelman 
2979209238afSKris Buschelman    Options Database Keys:
2980209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2981209238afSKris Buschelman 
2982209238afSKris Buschelman   Level: beginner
2983209238afSKris Buschelman 
298469b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2985209238afSKris Buschelman M*/
2986209238afSKris Buschelman 
2987273d9f13SBarry Smith /*@C
2988aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2989273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2990273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2991273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2992273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2993273d9f13SBarry Smith 
2994273d9f13SBarry Smith    Collective on Mat
2995273d9f13SBarry Smith 
2996273d9f13SBarry Smith    Input Parameters:
29971c4f3114SJed Brown +  B - the matrix
2998bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2999bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3000273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3001273d9f13SBarry Smith            submatrix  (same for all local rows)
3002273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
3003273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
30040298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
300595742e49SBarry Smith            set it even if it is zero.
3006273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3007273d9f13SBarry Smith            submatrix (same for all local rows).
3008273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
3009273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
30100298fd71SBarry Smith            each block row) or NULL.
3011273d9f13SBarry Smith 
301249a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
3013273d9f13SBarry Smith 
3014273d9f13SBarry Smith    Options Database Keys:
30158c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
30168c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
3017273d9f13SBarry Smith 
3018273d9f13SBarry Smith    Notes:
3019273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3020273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
3021273d9f13SBarry Smith 
3022273d9f13SBarry Smith    Storage Information:
3023273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
3024273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
3025273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
3026273d9f13SBarry Smith    local matrix (a rectangular submatrix).
3027273d9f13SBarry Smith 
3028273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
3029273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
30300298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3031273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
3032273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3033273d9f13SBarry Smith 
3034273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3035273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
3036273d9f13SBarry Smith 
3037273d9f13SBarry Smith .vb
3038273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
3039a4b1a0f6SJed Brown           --------------------------
3040273d9f13SBarry Smith    row 3  |o o o d d d o o o o  o  o
3041273d9f13SBarry Smith    row 4  |o o o d d d o o o o  o  o
3042273d9f13SBarry Smith    row 5  |o o o d d d o o o o  o  o
3043a4b1a0f6SJed Brown           --------------------------
3044273d9f13SBarry Smith .ve
3045273d9f13SBarry Smith 
3046273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
3047273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
3048273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3049273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
3050273d9f13SBarry Smith 
3051273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3052273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3053273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
3054273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3055273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
3056273d9f13SBarry Smith    matrices.
3057273d9f13SBarry Smith 
3058aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3059aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3060aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3061aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3062aa95bbe8SBarry Smith 
3063273d9f13SBarry Smith    Level: intermediate
3064273d9f13SBarry Smith 
3065ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3066273d9f13SBarry Smith @*/
30677087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3068273d9f13SBarry Smith {
30694ac538c5SBarry Smith   PetscErrorCode ierr;
3070273d9f13SBarry Smith 
3071273d9f13SBarry Smith   PetscFunctionBegin;
30726ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
30736ba663aaSJed Brown   PetscValidType(B,1);
30746ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
30754ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3076273d9f13SBarry Smith   PetscFunctionReturn(0);
3077273d9f13SBarry Smith }
3078273d9f13SBarry Smith 
307979bdfe76SSatish Balay /*@C
308069b1f4b7SBarry Smith    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
308179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
308279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
308379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
308479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
308579bdfe76SSatish Balay 
3086d083f849SBarry Smith    Collective
3087db81eaa0SLois Curfman McInnes 
308879bdfe76SSatish Balay    Input Parameters:
3089db81eaa0SLois Curfman McInnes +  comm - MPI communicator
3090bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3091bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
309279bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
309392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
309492e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
309592e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
309692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
309792e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
3098be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3099be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
310047a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
310179bdfe76SSatish Balay            submatrix  (same for all local rows)
310247a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
310392e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
31040298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
310595742e49SBarry Smith            and set it even if it is zero.
310647a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
310779bdfe76SSatish Balay            submatrix (same for all local rows).
310847a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
310992e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
31100298fd71SBarry Smith            each block row) or NULL.
311179bdfe76SSatish Balay 
311279bdfe76SSatish Balay    Output Parameter:
311379bdfe76SSatish Balay .  A - the matrix
311479bdfe76SSatish Balay 
3115db81eaa0SLois Curfman McInnes    Options Database Keys:
31168c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
31178c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
31183ffaccefSLois Curfman McInnes 
3119175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3120f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3121175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3122175b88e8SBarry Smith 
3123b259b22eSLois Curfman McInnes    Notes:
312449a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
312549a6f317SBarry Smith 
312647a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
312747a75d0bSBarry Smith 
312879bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
312979bdfe76SSatish Balay    (possibly both).
313079bdfe76SSatish Balay 
3131be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3132be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
3133be79a94dSBarry Smith 
313479bdfe76SSatish Balay    Storage Information:
313579bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
313679bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
313779bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
313879bdfe76SSatish Balay    local matrix (a rectangular submatrix).
313979bdfe76SSatish Balay 
314079bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
314179bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
31420298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
314379bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
314479bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
314579bdfe76SSatish Balay 
314679bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
314779bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
314879bdfe76SSatish Balay 
3149db81eaa0SLois Curfman McInnes .vb
3150db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
3151a4b1a0f6SJed Brown           --------------------------
3152db81eaa0SLois Curfman McInnes    row 3  |o o o d d d o o o o  o  o
3153db81eaa0SLois Curfman McInnes    row 4  |o o o d d d o o o o  o  o
3154db81eaa0SLois Curfman McInnes    row 5  |o o o d d d o o o o  o  o
3155a4b1a0f6SJed Brown           --------------------------
3156db81eaa0SLois Curfman McInnes .ve
315779bdfe76SSatish Balay 
315879bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
315979bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
316079bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
316157b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
316279bdfe76SSatish Balay 
3163d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3164d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
316579bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
316692e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
316792e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
31686da5968aSLois Curfman McInnes    matrices.
316979bdfe76SSatish Balay 
3170027ccd11SLois Curfman McInnes    Level: intermediate
3171027ccd11SLois Curfman McInnes 
317269b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
317379bdfe76SSatish Balay @*/
317469b1f4b7SBarry Smith PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
317579bdfe76SSatish Balay {
31766849ba73SBarry Smith   PetscErrorCode ierr;
3177b24ad042SBarry Smith   PetscMPIInt    size;
317879bdfe76SSatish Balay 
3179d64ed03dSBarry Smith   PetscFunctionBegin;
3180f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3181f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3182d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3183273d9f13SBarry Smith   if (size > 1) {
3184273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3185273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3186273d9f13SBarry Smith   } else {
3187273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3188273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
31893914022bSBarry Smith   }
31903a40ed3dSBarry Smith   PetscFunctionReturn(0);
319179bdfe76SSatish Balay }
3192026e39d0SSatish Balay 
31936849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
31940ac07820SSatish Balay {
31950ac07820SSatish Balay   Mat            mat;
31960ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3197dfbe8321SBarry Smith   PetscErrorCode ierr;
3198b24ad042SBarry Smith   PetscInt       len=0;
31990ac07820SSatish Balay 
3200d64ed03dSBarry Smith   PetscFunctionBegin;
3201f4259b30SLisandro Dalcin   *newmat = NULL;
3202ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3203d0f46423SBarry Smith   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
32047adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
32057fff6886SHong Zhang 
3206d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
3207273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
32080ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
32097fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
32107fff6886SHong Zhang 
3211273d9f13SBarry Smith   a             = (Mat_MPIBAIJ*)mat->data;
3212d0f46423SBarry Smith   mat->rmap->bs = matin->rmap->bs;
32130ac07820SSatish Balay   a->bs2        = oldmat->bs2;
32140ac07820SSatish Balay   a->mbs        = oldmat->mbs;
32150ac07820SSatish Balay   a->nbs        = oldmat->nbs;
32160ac07820SSatish Balay   a->Mbs        = oldmat->Mbs;
32170ac07820SSatish Balay   a->Nbs        = oldmat->Nbs;
32180ac07820SSatish Balay 
32191e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
32201e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3221899cda47SBarry Smith 
32220ac07820SSatish Balay   a->size         = oldmat->size;
32230ac07820SSatish Balay   a->rank         = oldmat->rank;
3224aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
3225aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
3226f4259b30SLisandro Dalcin   a->rowindices   = NULL;
3227f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
32280ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
3229f4259b30SLisandro Dalcin   a->barray       = NULL;
3230899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
3231899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
3232899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
3233899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
32340ac07820SSatish Balay 
3235133cdb44SSatish Balay   /* hash table stuff */
3236f4259b30SLisandro Dalcin   a->ht           = NULL;
3237f4259b30SLisandro Dalcin   a->hd           = NULL;
3238133cdb44SSatish Balay   a->ht_size      = 0;
3239133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
324025fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
3241133cdb44SSatish Balay   a->ht_total_ct  = 0;
3242133cdb44SSatish Balay   a->ht_insert_ct = 0;
3243133cdb44SSatish Balay 
3244580bdb30SBarry Smith   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
32450ac07820SSatish Balay   if (oldmat->colmap) {
3246aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
32470f5bd95cSBarry Smith     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
324848e59246SSatish Balay #else
3249854ce69bSBarry Smith     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
32503bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3251580bdb30SBarry Smith     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
325248e59246SSatish Balay #endif
3253f4259b30SLisandro Dalcin   } else a->colmap = NULL;
32544beb1cfeSHong Zhang 
32550ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3256785e854fSJed Brown     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
32573bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3258580bdb30SBarry Smith     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3259f4259b30SLisandro Dalcin   } else a->garray = NULL;
32600ac07820SSatish Balay 
3261ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
32620ac07820SSatish Balay   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
32633bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
32640ac07820SSatish Balay   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
32653bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
32667fff6886SHong Zhang 
32672e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
32683bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
32692e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
32703bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3271140e18c1SBarry Smith   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
32720ac07820SSatish Balay   *newmat = mat;
32733a40ed3dSBarry Smith   PetscFunctionReturn(0);
32740ac07820SSatish Balay }
327557b952d6SSatish Balay 
3276618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
3277b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3278b51a4376SLisandro Dalcin {
3279b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3280b51a4376SLisandro Dalcin   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3281b51a4376SLisandro Dalcin   PetscScalar    *matvals;
3282b51a4376SLisandro Dalcin   PetscErrorCode ierr;
3283b51a4376SLisandro Dalcin 
3284b51a4376SLisandro Dalcin   PetscFunctionBegin;
3285b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3286b51a4376SLisandro Dalcin 
3287b51a4376SLisandro Dalcin   /* read in matrix header */
3288b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3289b51a4376SLisandro Dalcin   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3290b51a4376SLisandro Dalcin   M  = header[1]; N = header[2]; nz = header[3];
3291b51a4376SLisandro Dalcin   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3292b51a4376SLisandro Dalcin   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3293b51a4376SLisandro Dalcin   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3294b51a4376SLisandro Dalcin 
3295b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
3296b51a4376SLisandro Dalcin   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3297618cc2edSLisandro Dalcin   /* set local sizes if not set already */
3298618cc2edSLisandro Dalcin   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3299618cc2edSLisandro Dalcin   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3300b51a4376SLisandro Dalcin   /* set global sizes if not set already */
3301b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3302b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
3303b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3304b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3305b51a4376SLisandro Dalcin 
3306b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
3307b51a4376SLisandro Dalcin   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3308b51a4376SLisandro Dalcin   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3309b51a4376SLisandro Dalcin   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3310b51a4376SLisandro Dalcin   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3311b51a4376SLisandro Dalcin   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3312b51a4376SLisandro Dalcin   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3313b51a4376SLisandro Dalcin   mbs = m/bs; nbs = n/bs;
3314b51a4376SLisandro Dalcin 
3315b51a4376SLisandro Dalcin   /* read in row lengths and build row indices */
3316b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3317b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3318b51a4376SLisandro Dalcin   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3319b51a4376SLisandro Dalcin   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3320b51a4376SLisandro Dalcin   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3321b51a4376SLisandro Dalcin 
3322b51a4376SLisandro Dalcin   /* read in column indices and matrix values */
3323b51a4376SLisandro Dalcin   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3324b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3325b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3326b51a4376SLisandro Dalcin 
3327b51a4376SLisandro Dalcin   { /* preallocate matrix storage */
3328b51a4376SLisandro Dalcin     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3329b51a4376SLisandro Dalcin     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3330618cc2edSLisandro Dalcin     PetscBool  sbaij,done;
3331b51a4376SLisandro Dalcin     PetscInt   *d_nnz,*o_nnz;
3332b51a4376SLisandro Dalcin 
3333b51a4376SLisandro Dalcin     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3334b51a4376SLisandro Dalcin     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3335b51a4376SLisandro Dalcin     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3336618cc2edSLisandro Dalcin     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3337b51a4376SLisandro Dalcin     for (i=0; i<mbs; i++) {
3338b51a4376SLisandro Dalcin       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3339b51a4376SLisandro Dalcin       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3340618cc2edSLisandro Dalcin       for (k=0; k<bs; k++) {
3341618cc2edSLisandro Dalcin         PetscInt row = bs*i + k;
3342618cc2edSLisandro Dalcin         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3343618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3344618cc2edSLisandro Dalcin           if (!sbaij || col >= row) {
3345618cc2edSLisandro Dalcin             if (col >= cs && col < ce) {
3346618cc2edSLisandro Dalcin               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3347b51a4376SLisandro Dalcin             } else {
3348618cc2edSLisandro Dalcin               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3349b51a4376SLisandro Dalcin               if (done) o_nnz[i]++;
3350b51a4376SLisandro Dalcin             }
3351b51a4376SLisandro Dalcin           }
3352618cc2edSLisandro Dalcin         }
3353618cc2edSLisandro Dalcin       }
3354618cc2edSLisandro Dalcin     }
3355b51a4376SLisandro Dalcin     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3356b51a4376SLisandro Dalcin     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3357b51a4376SLisandro Dalcin     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3358618cc2edSLisandro Dalcin     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3359b51a4376SLisandro Dalcin     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3360b51a4376SLisandro Dalcin   }
3361b51a4376SLisandro Dalcin 
3362b51a4376SLisandro Dalcin   /* store matrix values */
3363b51a4376SLisandro Dalcin   for (i=0; i<m; i++) {
3364b51a4376SLisandro Dalcin     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3365618cc2edSLisandro Dalcin     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3366b51a4376SLisandro Dalcin   }
3367b51a4376SLisandro Dalcin 
3368b51a4376SLisandro Dalcin   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3369b51a4376SLisandro Dalcin   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3370b51a4376SLisandro Dalcin   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3371b51a4376SLisandro Dalcin   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3372b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3373b51a4376SLisandro Dalcin }
3374b51a4376SLisandro Dalcin 
3375b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
33764683f7a4SShri Abhyankar {
33774683f7a4SShri Abhyankar   PetscErrorCode ierr;
33787f489da9SVaclav Hapla   PetscBool      isbinary;
33794683f7a4SShri Abhyankar 
33804683f7a4SShri Abhyankar   PetscFunctionBegin;
33817f489da9SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3382b51a4376SLisandro Dalcin   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3383b51a4376SLisandro Dalcin   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
33844683f7a4SShri Abhyankar   PetscFunctionReturn(0);
33854683f7a4SShri Abhyankar }
33864683f7a4SShri Abhyankar 
3387133cdb44SSatish Balay /*@
3388133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3389133cdb44SSatish Balay 
3390133cdb44SSatish Balay    Input Parameters:
3391a2b725a8SWilliam Gropp +  mat  - the matrix
3392a2b725a8SWilliam Gropp -  fact - factor
3393133cdb44SSatish Balay 
3394c5eb9154SBarry Smith    Not Collective, each process can use a different factor
3395fee21e36SBarry Smith 
33968c890885SBarry Smith    Level: advanced
33978c890885SBarry Smith 
3398133cdb44SSatish Balay   Notes:
33998c07d4e3SBarry Smith    This can also be set by the command line option: -mat_use_hash_table <fact>
3400133cdb44SSatish Balay 
3401133cdb44SSatish Balay .seealso: MatSetOption()
3402133cdb44SSatish Balay @*/
34037087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3404133cdb44SSatish Balay {
34054ac538c5SBarry Smith   PetscErrorCode ierr;
34065bf65638SKris Buschelman 
34075bf65638SKris Buschelman   PetscFunctionBegin;
34084ac538c5SBarry Smith   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
34095bf65638SKris Buschelman   PetscFunctionReturn(0);
34105bf65638SKris Buschelman }
34115bf65638SKris Buschelman 
34127087cfbeSBarry Smith PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
34135bf65638SKris Buschelman {
341425fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3415133cdb44SSatish Balay 
3416133cdb44SSatish Balay   PetscFunctionBegin;
3417133cdb44SSatish Balay   baij          = (Mat_MPIBAIJ*)mat->data;
3418133cdb44SSatish Balay   baij->ht_fact = fact;
3419133cdb44SSatish Balay   PetscFunctionReturn(0);
3420133cdb44SSatish Balay }
3421f2a5309cSSatish Balay 
34229230625dSJed Brown PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3423f2a5309cSSatish Balay {
3424f2a5309cSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3425ab4d48faSStefano Zampini   PetscBool      flg;
3426ab4d48faSStefano Zampini   PetscErrorCode ierr;
34275fd66863SKarl Rupp 
3428f2a5309cSSatish Balay   PetscFunctionBegin;
3429ab4d48faSStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3430ab4d48faSStefano Zampini   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
343121e72a00SBarry Smith   if (Ad)     *Ad     = a->A;
343221e72a00SBarry Smith   if (Ao)     *Ao     = a->B;
343321e72a00SBarry Smith   if (colmap) *colmap = a->garray;
3434f2a5309cSSatish Balay   PetscFunctionReturn(0);
3435f2a5309cSSatish Balay }
343685535b8eSBarry Smith 
343785535b8eSBarry Smith /*
343885535b8eSBarry Smith     Special version for direct calls from Fortran (to eliminate two function call overheads
343985535b8eSBarry Smith */
344085535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
344185535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
344285535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
344385535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
344485535b8eSBarry Smith #endif
344585535b8eSBarry Smith 
344685535b8eSBarry Smith /*@C
344785535b8eSBarry Smith   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
344885535b8eSBarry Smith 
344985535b8eSBarry Smith   Collective on Mat
345085535b8eSBarry Smith 
345185535b8eSBarry Smith   Input Parameters:
345285535b8eSBarry Smith + mat - the matrix
345385535b8eSBarry Smith . min - number of input rows
345485535b8eSBarry Smith . im - input rows
345585535b8eSBarry Smith . nin - number of input columns
345685535b8eSBarry Smith . in - input columns
345785535b8eSBarry Smith . v - numerical values input
345885535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES
345985535b8eSBarry Smith 
346095452b02SPatrick Sanan   Notes:
346195452b02SPatrick Sanan     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
346285535b8eSBarry Smith 
346385535b8eSBarry Smith   Level: advanced
346485535b8eSBarry Smith 
346585535b8eSBarry Smith .seealso:   MatSetValuesBlocked()
346685535b8eSBarry Smith @*/
346785535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
346885535b8eSBarry Smith {
346985535b8eSBarry Smith   /* convert input arguments to C version */
347085535b8eSBarry Smith   Mat        mat  = *matin;
347185535b8eSBarry Smith   PetscInt   m    = *min, n = *nin;
347285535b8eSBarry Smith   InsertMode addv = *addvin;
347385535b8eSBarry Smith 
347485535b8eSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
347585535b8eSBarry Smith   const MatScalar *value;
347685535b8eSBarry Smith   MatScalar       *barray     = baij->barray;
3477ace3abfcSBarry Smith   PetscBool       roworiented = baij->roworiented;
347885535b8eSBarry Smith   PetscErrorCode  ierr;
347985535b8eSBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
348085535b8eSBarry Smith   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3481d0f46423SBarry Smith   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
348285535b8eSBarry Smith 
348385535b8eSBarry Smith   PetscFunctionBegin;
348485535b8eSBarry Smith   /* tasks normally handled by MatSetValuesBlocked() */
348526fbe8dcSKarl Rupp   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
348676bd3646SJed Brown   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
348776bd3646SJed Brown   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
348885535b8eSBarry Smith   if (mat->assembled) {
348985535b8eSBarry Smith     mat->was_assembled = PETSC_TRUE;
349085535b8eSBarry Smith     mat->assembled     = PETSC_FALSE;
349185535b8eSBarry Smith   }
349285535b8eSBarry Smith   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
349385535b8eSBarry Smith 
349485535b8eSBarry Smith 
349585535b8eSBarry Smith   if (!barray) {
3496785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
349785535b8eSBarry Smith     baij->barray = barray;
349885535b8eSBarry Smith   }
349985535b8eSBarry Smith 
350026fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
350126fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
350226fbe8dcSKarl Rupp 
350385535b8eSBarry Smith   for (i=0; i<m; i++) {
350485535b8eSBarry Smith     if (im[i] < 0) continue;
3505cf9c20a2SJed Brown     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
350685535b8eSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
350785535b8eSBarry Smith       row = im[i] - rstart;
350885535b8eSBarry Smith       for (j=0; j<n; j++) {
350985535b8eSBarry Smith         /* If NumCol = 1 then a copy is not required */
351085535b8eSBarry Smith         if ((roworiented) && (n == 1)) {
351185535b8eSBarry Smith           barray = (MatScalar*)v + i*bs2;
351285535b8eSBarry Smith         } else if ((!roworiented) && (m == 1)) {
351385535b8eSBarry Smith           barray = (MatScalar*)v + j*bs2;
351485535b8eSBarry Smith         } else { /* Here a copy is required */
351585535b8eSBarry Smith           if (roworiented) {
351685535b8eSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
351785535b8eSBarry Smith           } else {
351885535b8eSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
351985535b8eSBarry Smith           }
352085535b8eSBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
352185535b8eSBarry Smith             for (jj=0; jj<bs; jj++) {
352285535b8eSBarry Smith               *barray++ = *value++;
352385535b8eSBarry Smith             }
352485535b8eSBarry Smith           }
352585535b8eSBarry Smith           barray -=bs2;
352685535b8eSBarry Smith         }
352785535b8eSBarry Smith 
352885535b8eSBarry Smith         if (in[j] >= cstart && in[j] < cend) {
352985535b8eSBarry Smith           col  = in[j] - cstart;
35308ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
353126fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
3532cf9c20a2SJed Brown         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
353385535b8eSBarry Smith         else {
353485535b8eSBarry Smith           if (mat->was_assembled) {
353585535b8eSBarry Smith             if (!baij->colmap) {
3536ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353785535b8eSBarry Smith             }
353885535b8eSBarry Smith 
353985535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
354085535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
354185535b8eSBarry Smith             { PetscInt data;
354285535b8eSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3543e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
354485535b8eSBarry Smith             }
354585535b8eSBarry Smith #else
3546e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
354785535b8eSBarry Smith #endif
354885535b8eSBarry Smith #endif
354985535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
355085535b8eSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
355185535b8eSBarry Smith             col  = (col - 1)/bs;
355285535b8eSBarry Smith #else
355385535b8eSBarry Smith             col = (baij->colmap[in[j]] - 1)/bs;
355485535b8eSBarry Smith #endif
355585535b8eSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3556ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355785535b8eSBarry Smith               col  =  in[j];
355885535b8eSBarry Smith             }
355926fbe8dcSKarl Rupp           } else col = in[j];
35608ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
356185535b8eSBarry Smith         }
356285535b8eSBarry Smith       }
356385535b8eSBarry Smith     } else {
356485535b8eSBarry Smith       if (!baij->donotstash) {
356585535b8eSBarry Smith         if (roworiented) {
356685535b8eSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
356785535b8eSBarry Smith         } else {
356885535b8eSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
356985535b8eSBarry Smith         }
357085535b8eSBarry Smith       }
357185535b8eSBarry Smith     }
357285535b8eSBarry Smith   }
357385535b8eSBarry Smith 
357485535b8eSBarry Smith   /* task normally handled by MatSetValuesBlocked() */
357585535b8eSBarry Smith   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
357685535b8eSBarry Smith   PetscFunctionReturn(0);
357785535b8eSBarry Smith }
3578dfb205c3SBarry Smith 
3579dfb205c3SBarry Smith /*@
3580483a2f95SBarry Smith      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3581dfb205c3SBarry Smith          CSR format the local rows.
3582dfb205c3SBarry Smith 
3583d083f849SBarry Smith    Collective
3584dfb205c3SBarry Smith 
3585dfb205c3SBarry Smith    Input Parameters:
3586dfb205c3SBarry Smith +  comm - MPI communicator
3587dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
3588dfb205c3SBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
3589dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
3590dfb205c3SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3591dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
3592dfb205c3SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3593dfb205c3SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3594483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3595dfb205c3SBarry Smith .   j - column indices
3596dfb205c3SBarry Smith -   a - matrix values
3597dfb205c3SBarry Smith 
3598dfb205c3SBarry Smith    Output Parameter:
3599dfb205c3SBarry Smith .   mat - the matrix
3600dfb205c3SBarry Smith 
3601dfb205c3SBarry Smith    Level: intermediate
3602dfb205c3SBarry Smith 
3603dfb205c3SBarry Smith    Notes:
3604dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3605dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3606dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3607dfb205c3SBarry Smith 
36083adadaf3SJed Brown      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
36093adadaf3SJed Brown      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
36103adadaf3SJed Brown      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
36113adadaf3SJed Brown      with column-major ordering within blocks.
36123adadaf3SJed Brown 
3613dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3614dfb205c3SBarry Smith 
3615dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
361669b1f4b7SBarry Smith           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3617dfb205c3SBarry Smith @*/
36187087cfbeSBarry Smith PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3619dfb205c3SBarry Smith {
3620dfb205c3SBarry Smith   PetscErrorCode ierr;
3621dfb205c3SBarry Smith 
3622dfb205c3SBarry Smith   PetscFunctionBegin;
3623f23aa3ddSBarry Smith   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3624dfb205c3SBarry Smith   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3625dfb205c3SBarry Smith   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3626dfb205c3SBarry Smith   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
36279a43d2d5SJed Brown   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
362827f91139SJed Brown   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
362927f91139SJed Brown   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3630d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3631dfb205c3SBarry Smith   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3632d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3633dfb205c3SBarry Smith   PetscFunctionReturn(0);
3634dfb205c3SBarry Smith }
3635e561ad89SHong Zhang 
3636bd153df0SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3637e561ad89SHong Zhang {
3638e561ad89SHong Zhang   PetscErrorCode ierr;
3639bd153df0SHong Zhang   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3640bd153df0SHong Zhang   PetscInt       *indx;
3641bd153df0SHong Zhang   PetscScalar    *values;
3642e561ad89SHong Zhang 
3643e561ad89SHong Zhang   PetscFunctionBegin;
3644e561ad89SHong Zhang   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3645bd153df0SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3646bd153df0SHong Zhang     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
36472c6ba4edSHong Zhang     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3648bd153df0SHong Zhang     PetscInt       *bindx,rmax=a->rmax,j;
364977f764caSHong Zhang     PetscMPIInt    rank,size;
3650e561ad89SHong Zhang 
3651bd153df0SHong Zhang     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3652bd153df0SHong Zhang     mbs = m/bs; Nbs = N/cbs;
3653bd153df0SHong Zhang     if (n == PETSC_DECIDE) {
3654da91a574SPierre Jolivet       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3655bd153df0SHong Zhang     }
3656da91a574SPierre Jolivet     nbs = n/cbs;
3657e561ad89SHong Zhang 
3658647a6520SHong Zhang     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
365977f764caSHong Zhang     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
366077f764caSHong Zhang 
366177f764caSHong Zhang     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
366277f764caSHong Zhang     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
366377f764caSHong Zhang     if (rank == size-1) {
366477f764caSHong Zhang       /* Check sum(nbs) = Nbs */
36652c6ba4edSHong Zhang       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
366677f764caSHong Zhang     }
366777f764caSHong Zhang 
366877f764caSHong Zhang     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3669bd153df0SHong Zhang     for (i=0; i<mbs; i++) {
3670647a6520SHong Zhang       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3671647a6520SHong Zhang       nnz = nnz/bs;
3672647a6520SHong Zhang       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3673647a6520SHong Zhang       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3674647a6520SHong Zhang       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3675e561ad89SHong Zhang     }
3676647a6520SHong Zhang     ierr = PetscFree(bindx);CHKERRQ(ierr);
3677e561ad89SHong Zhang 
3678e561ad89SHong Zhang     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
367977f764caSHong Zhang     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3680e561ad89SHong Zhang     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
36818761c3d6SHong Zhang     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
36828761c3d6SHong Zhang     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3683e561ad89SHong Zhang     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3684e561ad89SHong Zhang     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3685e561ad89SHong Zhang   }
3686e561ad89SHong Zhang 
3687bd153df0SHong Zhang   /* numeric phase */
3688647a6520SHong Zhang   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3689bd153df0SHong Zhang   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3690e561ad89SHong Zhang 
3691e561ad89SHong Zhang   for (i=0; i<m; i++) {
3692e561ad89SHong Zhang     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3693e561ad89SHong Zhang     Ii   = i + rstart;
3694bd153df0SHong Zhang     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3695e561ad89SHong Zhang     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3696e561ad89SHong Zhang   }
3697bd153df0SHong Zhang   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3698bd153df0SHong Zhang   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699e561ad89SHong Zhang   PetscFunctionReturn(0);
3700e561ad89SHong Zhang }
3701