xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a873a8cd69acc6fd9b12ad3d6b30ee1bf0a81da9)
1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2c5d9258eSSatish Balay 
3b51a4376SLisandro Dalcin #include <petsc/private/hashseti.h>
4c6db04a5SJed Brown #include <petscblaslapack.h>
565a92638SMatthew G. Knepley #include <petscsf.h>
679bdfe76SSatish Balay 
77ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
87ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
97ea3e4caSstefano_zampini #endif
107ea3e4caSstefano_zampini 
11985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
127843d17aSBarry Smith {
137843d17aSBarry Smith   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14dfbe8321SBarry Smith   PetscErrorCode    ierr;
154e879edeSHong Zhang   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
164e879edeSHong Zhang   PetscScalar       *va,*vv;
174e879edeSHong Zhang   Vec               vB,vA;
184e879edeSHong Zhang   const PetscScalar *vb;
197843d17aSBarry Smith 
207843d17aSBarry Smith   PetscFunctionBegin;
214e879edeSHong Zhang   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
224e879edeSHong Zhang   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
234e879edeSHong Zhang 
244e879edeSHong Zhang   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25985db425SBarry Smith   if (idx) {
264e879edeSHong Zhang     for (i=0; i<m; i++) {
2726fbe8dcSKarl Rupp       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2826fbe8dcSKarl Rupp     }
29985db425SBarry Smith   }
307843d17aSBarry Smith 
314e879edeSHong Zhang   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
3243359b5eSHong Zhang   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
334e879edeSHong Zhang   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
347843d17aSBarry Smith 
354e879edeSHong Zhang   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
364e879edeSHong Zhang   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
374e879edeSHong Zhang   for (i=0; i<m; i++) {
3826fbe8dcSKarl Rupp     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
394e879edeSHong Zhang       vv[i] = vb[i];
404e879edeSHong Zhang       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
414e879edeSHong Zhang     } else {
424e879edeSHong Zhang       vv[i] = va[i];
4343359b5eSHong Zhang       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
444e879edeSHong Zhang         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
4526fbe8dcSKarl Rupp     }
467843d17aSBarry Smith   }
474e879edeSHong Zhang   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
484e879edeSHong Zhang   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
494e879edeSHong Zhang   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50c31cb41cSBarry Smith   ierr = PetscFree(idxb);CHKERRQ(ierr);
514e879edeSHong Zhang   ierr = VecDestroy(&vA);CHKERRQ(ierr);
524e879edeSHong Zhang   ierr = VecDestroy(&vB);CHKERRQ(ierr);
537843d17aSBarry Smith   PetscFunctionReturn(0);
547843d17aSBarry Smith }
557843d17aSBarry Smith 
567087cfbeSBarry Smith PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
577fc3c18eSBarry Smith {
587fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59dfbe8321SBarry Smith   PetscErrorCode ierr;
607fc3c18eSBarry Smith 
617fc3c18eSBarry Smith   PetscFunctionBegin;
627fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
637fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
647fc3c18eSBarry Smith   PetscFunctionReturn(0);
657fc3c18eSBarry Smith }
667fc3c18eSBarry Smith 
677087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
687fc3c18eSBarry Smith {
697fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70dfbe8321SBarry Smith   PetscErrorCode ierr;
717fc3c18eSBarry Smith 
727fc3c18eSBarry Smith   PetscFunctionBegin;
737fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
747fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
757fc3c18eSBarry Smith   PetscFunctionReturn(0);
767fc3c18eSBarry Smith }
777fc3c18eSBarry Smith 
78537820f0SBarry Smith /*
79537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
8057b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
81e06f6af7SJed Brown    storage of the matrix.  This is done in a non scalable way since the
8257b952d6SSatish Balay    length of colmap equals the global matrix length.
8357b952d6SSatish Balay */
84ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
8557b952d6SSatish Balay {
8657b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
8757b952d6SSatish Balay   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
886849ba73SBarry Smith   PetscErrorCode ierr;
89d0f46423SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
9057b952d6SSatish Balay 
91d64ed03dSBarry Smith   PetscFunctionBegin;
92aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
93e23dfa41SBarry Smith   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
9448e59246SSatish Balay   for (i=0; i<nbs; i++) {
953861aac3SJed Brown     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
9648e59246SSatish Balay   }
9748e59246SSatish Balay #else
98580bdb30SBarry Smith   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
993bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
10148e59246SSatish Balay #endif
1023a40ed3dSBarry Smith   PetscFunctionReturn(0);
10357b952d6SSatish Balay }
10457b952d6SSatish Balay 
105d40312a9SBarry Smith #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
10680c1aa95SSatish Balay   { \
10780c1aa95SSatish Balay     brow = row/bs;  \
10880c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
11080c1aa95SSatish Balay     bcol = col/bs; \
11180c1aa95SSatish Balay     ridx = row % bs; cidx = col % bs; \
112ab26458aSBarry Smith     low  = 0; high = nrow; \
113ab26458aSBarry Smith     while (high-low > 3) { \
114ab26458aSBarry Smith       t = (low+high)/2; \
115ab26458aSBarry Smith       if (rp[t] > bcol) high = t; \
116ab26458aSBarry Smith       else              low  = t; \
117ab26458aSBarry Smith     } \
118ab26458aSBarry Smith     for (_i=low; _i<high; _i++) { \
11980c1aa95SSatish Balay       if (rp[_i] > bcol) break; \
12080c1aa95SSatish Balay       if (rp[_i] == bcol) { \
12180c1aa95SSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
122eada6651SSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
123eada6651SSatish Balay         else                    *bap  = value;  \
124ac7a638eSSatish Balay         goto a_noinsert; \
12580c1aa95SSatish Balay       } \
12680c1aa95SSatish Balay     } \
12789280ab3SLois Curfman McInnes     if (a->nonew == 1) goto a_noinsert; \
128d40312a9SBarry Smith     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
129fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
13080c1aa95SSatish Balay     N = nrow++ - 1;  \
13180c1aa95SSatish Balay     /* shift up all the later entries in this row */ \
132580bdb30SBarry Smith     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133580bdb30SBarry Smith     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134580bdb30SBarry Smith     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
13580c1aa95SSatish Balay     rp[_i]                      = bcol;  \
13680c1aa95SSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
137ac7a638eSSatish Balay a_noinsert:; \
13880c1aa95SSatish Balay     ailen[brow] = nrow; \
13980c1aa95SSatish Balay   }
14057b952d6SSatish Balay 
141d40312a9SBarry Smith #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142ac7a638eSSatish Balay   { \
143ac7a638eSSatish Balay     brow = row/bs;  \
144ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
146ac7a638eSSatish Balay     bcol = col/bs; \
147ac7a638eSSatish Balay     ridx = row % bs; cidx = col % bs; \
148ac7a638eSSatish Balay     low  = 0; high = nrow; \
149ac7a638eSSatish Balay     while (high-low > 3) { \
150ac7a638eSSatish Balay       t = (low+high)/2; \
151ac7a638eSSatish Balay       if (rp[t] > bcol) high = t; \
152ac7a638eSSatish Balay       else              low  = t; \
153ac7a638eSSatish Balay     } \
154ac7a638eSSatish Balay     for (_i=low; _i<high; _i++) { \
155ac7a638eSSatish Balay       if (rp[_i] > bcol) break; \
156ac7a638eSSatish Balay       if (rp[_i] == bcol) { \
157ac7a638eSSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
158ac7a638eSSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
159ac7a638eSSatish Balay         else                    *bap  = value;  \
160ac7a638eSSatish Balay         goto b_noinsert; \
161ac7a638eSSatish Balay       } \
162ac7a638eSSatish Balay     } \
16389280ab3SLois Curfman McInnes     if (b->nonew == 1) goto b_noinsert; \
164d40312a9SBarry Smith     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
165fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166ac7a638eSSatish Balay     N = nrow++ - 1;  \
167ac7a638eSSatish Balay     /* shift up all the later entries in this row */ \
168580bdb30SBarry Smith     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169580bdb30SBarry Smith     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170580bdb30SBarry Smith     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171ac7a638eSSatish Balay     rp[_i]                      = bcol;  \
172ac7a638eSSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
173ac7a638eSSatish Balay b_noinsert:; \
174ac7a638eSSatish Balay     bilen[brow] = nrow; \
175ac7a638eSSatish Balay   }
176ac7a638eSSatish Balay 
177b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
17857b952d6SSatish Balay {
17957b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
18093fea6afSBarry Smith   MatScalar      value;
181ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
182dfbe8321SBarry Smith   PetscErrorCode ierr;
183b24ad042SBarry Smith   PetscInt       i,j,row,col;
184d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
185d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186d0f46423SBarry Smith   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
18757b952d6SSatish Balay 
188eada6651SSatish Balay   /* Some Variables required in the macro */
18980c1aa95SSatish Balay   Mat         A     = baij->A;
19080c1aa95SSatish Balay   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191b24ad042SBarry Smith   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
1923eda8832SBarry Smith   MatScalar   *aa   =a->a;
193ac7a638eSSatish Balay 
194ac7a638eSSatish Balay   Mat         B     = baij->B;
195ac7a638eSSatish Balay   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196b24ad042SBarry Smith   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
1973eda8832SBarry Smith   MatScalar   *ba   =b->a;
198ac7a638eSSatish Balay 
199b24ad042SBarry Smith   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200b24ad042SBarry Smith   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
2013eda8832SBarry Smith   MatScalar *ap,*bap;
20280c1aa95SSatish Balay 
203d64ed03dSBarry Smith   PetscFunctionBegin;
20457b952d6SSatish Balay   for (i=0; i<m; i++) {
2055ef9f2a5SBarry Smith     if (im[i] < 0) continue;
206c1758adbSBarry Smith     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
20757b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
20857b952d6SSatish Balay       row = im[i] - rstart_orig;
20957b952d6SSatish Balay       for (j=0; j<n; j++) {
21057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) {
21157b952d6SSatish Balay           col = in[j] - cstart_orig;
212db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
213db4deed7SKarl Rupp           else             value = v[i+j*m];
214d40312a9SBarry Smith           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
21573959e64SBarry Smith         } else if (in[j] < 0) continue;
216c1758adbSBarry Smith         else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
2179245e749SBarry Smith         else {
21857b952d6SSatish Balay           if (mat->was_assembled) {
219905e6a2fSBarry Smith             if (!baij->colmap) {
220ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221905e6a2fSBarry Smith             }
222aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2230f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224bba1ac68SSatish Balay             col  = col - 1;
22548e59246SSatish Balay #else
226bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
22748e59246SSatish Balay #endif
228c9ef50b2SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
2308295de27SSatish Balay               col  =  in[j];
2319bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2329bf004c3SSatish Balay               B    = baij->B;
2339bf004c3SSatish Balay               b    = (Mat_SeqBAIJ*)(B)->data;
2349bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2359bf004c3SSatish Balay               ba   =b->a;
236c9ef50b2SBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
237c9ef50b2SBarry Smith             else col += in[j]%bs;
2388295de27SSatish Balay           } else col = in[j];
239db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
240db4deed7SKarl Rupp           else             value = v[i+j*m];
241d40312a9SBarry Smith           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
24290da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
24357b952d6SSatish Balay         }
24457b952d6SSatish Balay       }
245d64ed03dSBarry Smith     } else {
2464cb17eb5SBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
24790f02eecSBarry Smith       if (!baij->donotstash) {
2485080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
249ff2fd236SBarry Smith         if (roworiented) {
250b400d20cSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251ff2fd236SBarry Smith         } else {
252b400d20cSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
25357b952d6SSatish Balay         }
25457b952d6SSatish Balay       }
25557b952d6SSatish Balay     }
25690f02eecSBarry Smith   }
2573a40ed3dSBarry Smith   PetscFunctionReturn(0);
25857b952d6SSatish Balay }
25957b952d6SSatish Balay 
2608ab52850SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261880c6e6aSBarry Smith {
262880c6e6aSBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
2638ab52850SBarry Smith   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264880c6e6aSBarry Smith   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265880c6e6aSBarry Smith   PetscErrorCode    ierr;
2668ab52850SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267880c6e6aSBarry Smith   PetscBool         roworiented=a->roworiented;
268880c6e6aSBarry Smith   const PetscScalar *value     = v;
269880c6e6aSBarry Smith   MatScalar         *ap,*aa = a->a,*bap;
270880c6e6aSBarry Smith 
271880c6e6aSBarry Smith   PetscFunctionBegin;
272880c6e6aSBarry Smith   rp   = aj + ai[row];
273880c6e6aSBarry Smith   ap   = aa + bs2*ai[row];
274880c6e6aSBarry Smith   rmax = imax[row];
275880c6e6aSBarry Smith   nrow = ailen[row];
2768ab52850SBarry Smith   value = v;
2778ab52850SBarry Smith   low = 0;
2788ab52850SBarry Smith   high = nrow;
279880c6e6aSBarry Smith   while (high-low > 7) {
280880c6e6aSBarry Smith     t = (low+high)/2;
281880c6e6aSBarry Smith     if (rp[t] > col) high = t;
282880c6e6aSBarry Smith     else             low  = t;
283880c6e6aSBarry Smith   }
284880c6e6aSBarry Smith   for (i=low; i<high; i++) {
285880c6e6aSBarry Smith     if (rp[i] > col) break;
286880c6e6aSBarry Smith     if (rp[i] == col) {
287880c6e6aSBarry Smith       bap = ap +  bs2*i;
288880c6e6aSBarry Smith       if (roworiented) {
289880c6e6aSBarry Smith         if (is == ADD_VALUES) {
2908ab52850SBarry Smith           for (ii=0; ii<bs; ii++) {
291880c6e6aSBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
292880c6e6aSBarry Smith               bap[jj] += *value++;
293880c6e6aSBarry Smith             }
294880c6e6aSBarry Smith           }
295880c6e6aSBarry Smith         } else {
2968ab52850SBarry Smith           for (ii=0; ii<bs; ii++) {
297880c6e6aSBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
298880c6e6aSBarry Smith               bap[jj] = *value++;
299880c6e6aSBarry Smith             }
300880c6e6aSBarry Smith           }
301880c6e6aSBarry Smith         }
302880c6e6aSBarry Smith       } else {
303880c6e6aSBarry Smith         if (is == ADD_VALUES) {
3048ab52850SBarry Smith           for (ii=0; ii<bs; ii++,value+=bs) {
305880c6e6aSBarry Smith             for (jj=0; jj<bs; jj++) {
306880c6e6aSBarry Smith               bap[jj] += value[jj];
307880c6e6aSBarry Smith             }
308880c6e6aSBarry Smith             bap += bs;
309880c6e6aSBarry Smith           }
310880c6e6aSBarry Smith         } else {
3118ab52850SBarry Smith           for (ii=0; ii<bs; ii++,value+=bs) {
312880c6e6aSBarry Smith             for (jj=0; jj<bs; jj++) {
313880c6e6aSBarry Smith               bap[jj]  = value[jj];
314880c6e6aSBarry Smith             }
315880c6e6aSBarry Smith             bap += bs;
316880c6e6aSBarry Smith           }
317880c6e6aSBarry Smith         }
318880c6e6aSBarry Smith       }
319880c6e6aSBarry Smith       goto noinsert2;
320880c6e6aSBarry Smith     }
321880c6e6aSBarry Smith   }
322880c6e6aSBarry Smith   if (nonew == 1) goto noinsert2;
323880c6e6aSBarry Smith   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
324880c6e6aSBarry Smith   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325880c6e6aSBarry Smith   N = nrow++ - 1; high++;
326880c6e6aSBarry Smith   /* shift up all the later entries in this row */
327580bdb30SBarry Smith   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328580bdb30SBarry Smith   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329880c6e6aSBarry Smith   rp[i] = col;
330880c6e6aSBarry Smith   bap   = ap +  bs2*i;
331880c6e6aSBarry Smith   if (roworiented) {
3328ab52850SBarry Smith     for (ii=0; ii<bs; ii++) {
333880c6e6aSBarry Smith       for (jj=ii; jj<bs2; jj+=bs) {
334880c6e6aSBarry Smith         bap[jj] = *value++;
335880c6e6aSBarry Smith       }
336880c6e6aSBarry Smith     }
337880c6e6aSBarry Smith   } else {
3388ab52850SBarry Smith     for (ii=0; ii<bs; ii++) {
339880c6e6aSBarry Smith       for (jj=0; jj<bs; jj++) {
340880c6e6aSBarry Smith         *bap++ = *value++;
341880c6e6aSBarry Smith       }
342880c6e6aSBarry Smith     }
343880c6e6aSBarry Smith   }
344880c6e6aSBarry Smith   noinsert2:;
345880c6e6aSBarry Smith   ailen[row] = nrow;
346880c6e6aSBarry Smith   PetscFunctionReturn(0);
347880c6e6aSBarry Smith }
348880c6e6aSBarry Smith 
3498ab52850SBarry Smith /*
3508ab52850SBarry Smith     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
3518ab52850SBarry Smith     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
3528ab52850SBarry Smith */
35397e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354ab26458aSBarry Smith {
355ab26458aSBarry Smith   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356dd6ea824SBarry Smith   const PetscScalar *value;
357f15d580aSBarry Smith   MatScalar         *barray     = baij->barray;
358ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
359dfbe8321SBarry Smith   PetscErrorCode    ierr;
360899cda47SBarry Smith   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361899cda47SBarry Smith   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362d0f46423SBarry Smith   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363ab26458aSBarry Smith 
364b16ae2b1SBarry Smith   PetscFunctionBegin;
36530793edcSSatish Balay   if (!barray) {
366785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
36782502324SSatish Balay     baij->barray = barray;
36830793edcSSatish Balay   }
36930793edcSSatish Balay 
37026fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
37126fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
37226fbe8dcSKarl Rupp 
373ab26458aSBarry Smith   for (i=0; i<m; i++) {
3745ef9f2a5SBarry Smith     if (im[i] < 0) continue;
375cf9c20a2SJed Brown     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
376ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
377ab26458aSBarry Smith       row = im[i] - rstart;
378ab26458aSBarry Smith       for (j=0; j<n; j++) {
37915b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
38015b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
381f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
38215b57d14SSatish Balay         } else if ((!roworiented) && (m == 1)) {
383f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
38415b57d14SSatish Balay         } else { /* Here a copy is required */
385ab26458aSBarry Smith           if (roworiented) {
38653ef36baSBarry Smith             value = v + (i*(stepval+bs) + j)*bs;
387ab26458aSBarry Smith           } else {
38853ef36baSBarry Smith             value = v + (j*(stepval+bs) + i)*bs;
389abef11f7SSatish Balay           }
39053ef36baSBarry Smith           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
39126fbe8dcSKarl Rupp             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
39253ef36baSBarry Smith             barray += bs;
39347513183SBarry Smith           }
39430793edcSSatish Balay           barray -= bs2;
39515b57d14SSatish Balay         }
396abef11f7SSatish Balay 
397abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend) {
398abef11f7SSatish Balay           col  = in[j] - cstart;
3998ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
40026fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
4019245e749SBarry Smith         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
4029245e749SBarry Smith         else {
403ab26458aSBarry Smith           if (mat->was_assembled) {
404ab26458aSBarry Smith             if (!baij->colmap) {
405ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406ab26458aSBarry Smith             }
407a5eb4965SSatish Balay 
4082515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
409aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
410b24ad042SBarry Smith             { PetscInt data;
4110f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413fa46199cSSatish Balay             }
41448e59246SSatish Balay #else
415e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416a5eb4965SSatish Balay #endif
41748e59246SSatish Balay #endif
418aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
4190f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420fa46199cSSatish Balay             col  = (col - 1)/bs;
42148e59246SSatish Balay #else
422a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
42348e59246SSatish Balay #endif
4240e9bae81SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426ab26458aSBarry Smith               col  =  in[j];
427bb003d0fSBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
428db4deed7SKarl Rupp           } else col = in[j];
4298ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430ab26458aSBarry Smith         }
431ab26458aSBarry Smith       }
432d64ed03dSBarry Smith     } else {
433bb003d0fSBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434ab26458aSBarry Smith       if (!baij->donotstash) {
435ff2fd236SBarry Smith         if (roworiented) {
4366fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437ff2fd236SBarry Smith         } else {
4386fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439ff2fd236SBarry Smith         }
440abef11f7SSatish Balay       }
441ab26458aSBarry Smith     }
442ab26458aSBarry Smith   }
4433a40ed3dSBarry Smith   PetscFunctionReturn(0);
444ab26458aSBarry Smith }
4456fa18ffdSBarry Smith 
4460bdbc534SSatish Balay #define HASH_KEY 0.6180339887
447b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
45097e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
4510bdbc534SSatish Balay {
4520bdbc534SSatish Balay   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
454dfbe8321SBarry Smith   PetscErrorCode ierr;
455b24ad042SBarry Smith   PetscInt       i,j,row,col;
456d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
457d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458d0f46423SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459329f5518SBarry Smith   PetscReal      tmp;
4603eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
461b24ad042SBarry Smith   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4620bdbc534SSatish Balay 
4630bdbc534SSatish Balay   PetscFunctionBegin;
4640bdbc534SSatish Balay   for (i=0; i<m; i++) {
46576bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {
466e32f2f54SBarry Smith       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467e32f2f54SBarry Smith       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
46876bd3646SJed Brown     }
4690bdbc534SSatish Balay     row = im[i];
470c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4710bdbc534SSatish Balay       for (j=0; j<n; j++) {
4720bdbc534SSatish Balay         col = in[j];
473db4deed7SKarl Rupp         if (roworiented) value = v[i*n+j];
474db4deed7SKarl Rupp         else             value = v[i+j*m];
475b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
476c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
477c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4780bdbc534SSatish Balay 
479c2760754SSatish Balay         idx = h1;
48076bd3646SJed Brown         if (PetscDefined(USE_DEBUG)) {
481187ce0cbSSatish Balay           insert_ct++;
482187ce0cbSSatish Balay           total_ct++;
483187ce0cbSSatish Balay           if (HT[idx] != key) {
484187ce0cbSSatish Balay             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
485187ce0cbSSatish Balay             if (idx == size) {
486187ce0cbSSatish Balay               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
487f23aa3ddSBarry Smith               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
488187ce0cbSSatish Balay             }
489187ce0cbSSatish Balay           }
49076bd3646SJed Brown         } else if (HT[idx] != key) {
491c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
492c2760754SSatish Balay           if (idx == size) {
493c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
494f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
495c2760754SSatish Balay           }
496c2760754SSatish Balay         }
497c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
498c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
499c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
5000bdbc534SSatish Balay       }
50126fbe8dcSKarl Rupp     } else if (!baij->donotstash) {
502ff2fd236SBarry Smith       if (roworiented) {
503b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
504ff2fd236SBarry Smith       } else {
505b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
5060bdbc534SSatish Balay       }
5070bdbc534SSatish Balay     }
5080bdbc534SSatish Balay   }
50976bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
510abf3b562SBarry Smith     baij->ht_total_ct  += total_ct;
511abf3b562SBarry Smith     baij->ht_insert_ct += insert_ct;
51276bd3646SJed Brown   }
5130bdbc534SSatish Balay   PetscFunctionReturn(0);
5140bdbc534SSatish Balay }
5150bdbc534SSatish Balay 
51697e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
5170bdbc534SSatish Balay {
5180bdbc534SSatish Balay   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
519ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
520dfbe8321SBarry Smith   PetscErrorCode    ierr;
521b24ad042SBarry Smith   PetscInt          i,j,ii,jj,row,col;
522899cda47SBarry Smith   PetscInt          rstart=baij->rstartbs;
523d0f46423SBarry Smith   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
524b24ad042SBarry Smith   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
525329f5518SBarry Smith   PetscReal         tmp;
5263eda8832SBarry Smith   MatScalar         **HD = baij->hd,*baij_a;
527dd6ea824SBarry Smith   const PetscScalar *v_t,*value;
528b24ad042SBarry Smith   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5290bdbc534SSatish Balay 
530d0a41580SSatish Balay   PetscFunctionBegin;
53126fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
53226fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
53326fbe8dcSKarl Rupp 
5340bdbc534SSatish Balay   for (i=0; i<m; i++) {
53576bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {
536e32f2f54SBarry Smith       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
537e32f2f54SBarry Smith       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
53876bd3646SJed Brown     }
5390bdbc534SSatish Balay     row = im[i];
540ab715e2cSSatish Balay     v_t = v + i*nbs2;
541c2760754SSatish Balay     if (row >= rstart && row < rend) {
5420bdbc534SSatish Balay       for (j=0; j<n; j++) {
5430bdbc534SSatish Balay         col = in[j];
5440bdbc534SSatish Balay 
5450bdbc534SSatish Balay         /* Look up into the Hash Table */
546c2760754SSatish Balay         key = row*Nbs+col+1;
547c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5480bdbc534SSatish Balay 
549c2760754SSatish Balay         idx = h1;
55076bd3646SJed Brown         if (PetscDefined(USE_DEBUG)) {
551187ce0cbSSatish Balay           total_ct++;
552187ce0cbSSatish Balay           insert_ct++;
553187ce0cbSSatish Balay           if (HT[idx] != key) {
554187ce0cbSSatish Balay             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
555187ce0cbSSatish Balay             if (idx == size) {
556187ce0cbSSatish Balay               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
557f23aa3ddSBarry Smith               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
558187ce0cbSSatish Balay             }
559187ce0cbSSatish Balay           }
56076bd3646SJed Brown         } else if (HT[idx] != key) {
561c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
562c2760754SSatish Balay           if (idx == size) {
563c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
564f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
565c2760754SSatish Balay           }
566c2760754SSatish Balay         }
567c2760754SSatish Balay         baij_a = HD[idx];
5680bdbc534SSatish Balay         if (roworiented) {
569c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
570187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
571187ce0cbSSatish Balay           value = v_t;
572187ce0cbSSatish Balay           v_t  += bs;
573fef45726SSatish Balay           if (addv == ADD_VALUES) {
574c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
575c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
576fef45726SSatish Balay                 baij_a[jj] += *value++;
577b4cc0f5aSSatish Balay               }
578b4cc0f5aSSatish Balay             }
579fef45726SSatish Balay           } else {
580c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
581c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
582fef45726SSatish Balay                 baij_a[jj] = *value++;
583fef45726SSatish Balay               }
584fef45726SSatish Balay             }
585fef45726SSatish Balay           }
5860bdbc534SSatish Balay         } else {
5870bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
588fef45726SSatish Balay           if (addv == ADD_VALUES) {
589b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
5900bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
591fef45726SSatish Balay                 baij_a[jj] += *value++;
592fef45726SSatish Balay               }
593fef45726SSatish Balay             }
594fef45726SSatish Balay           } else {
595fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
596fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
597fef45726SSatish Balay                 baij_a[jj] = *value++;
598fef45726SSatish Balay               }
599b4cc0f5aSSatish Balay             }
6000bdbc534SSatish Balay           }
6010bdbc534SSatish Balay         }
6020bdbc534SSatish Balay       }
6030bdbc534SSatish Balay     } else {
6040bdbc534SSatish Balay       if (!baij->donotstash) {
6050bdbc534SSatish Balay         if (roworiented) {
6068798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6070bdbc534SSatish Balay         } else {
6088798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6090bdbc534SSatish Balay         }
6100bdbc534SSatish Balay       }
6110bdbc534SSatish Balay     }
6120bdbc534SSatish Balay   }
61376bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
614abf3b562SBarry Smith     baij->ht_total_ct  += total_ct;
615abf3b562SBarry Smith     baij->ht_insert_ct += insert_ct;
61676bd3646SJed Brown   }
6170bdbc534SSatish Balay   PetscFunctionReturn(0);
6180bdbc534SSatish Balay }
619133cdb44SSatish Balay 
620b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621d6de1c52SSatish Balay {
622d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
6236849ba73SBarry Smith   PetscErrorCode ierr;
624d0f46423SBarry Smith   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
625d0f46423SBarry Smith   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
626d6de1c52SSatish Balay 
627133cdb44SSatish Balay   PetscFunctionBegin;
628d6de1c52SSatish Balay   for (i=0; i<m; i++) {
629e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
630e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
631d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
632d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
633d6de1c52SSatish Balay       for (j=0; j<n; j++) {
634e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
635e32f2f54SBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
636d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
637d6de1c52SSatish Balay           col  = idxn[j] - bscstart;
63898dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
639d64ed03dSBarry Smith         } else {
640905e6a2fSBarry Smith           if (!baij->colmap) {
641ab9863d7SBarry Smith             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
642905e6a2fSBarry Smith           }
643aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
6440f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
645fa46199cSSatish Balay           data--;
64648e59246SSatish Balay #else
64748e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
64848e59246SSatish Balay #endif
64948e59246SSatish Balay           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
650d9d09a02SSatish Balay           else {
65148e59246SSatish Balay             col  = data + idxn[j]%bs;
65298dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
653d6de1c52SSatish Balay           }
654d6de1c52SSatish Balay         }
655d6de1c52SSatish Balay       }
656f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
657d6de1c52SSatish Balay   }
6583a40ed3dSBarry Smith   PetscFunctionReturn(0);
659d6de1c52SSatish Balay }
660d6de1c52SSatish Balay 
661dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
662d6de1c52SSatish Balay {
663d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
664d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
665dfbe8321SBarry Smith   PetscErrorCode ierr;
666d0f46423SBarry Smith   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
667329f5518SBarry Smith   PetscReal      sum = 0.0;
6683eda8832SBarry Smith   MatScalar      *v;
669d6de1c52SSatish Balay 
670d64ed03dSBarry Smith   PetscFunctionBegin;
671d6de1c52SSatish Balay   if (baij->size == 1) {
672064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
673d6de1c52SSatish Balay   } else {
674d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
675d6de1c52SSatish Balay       v  = amat->a;
6768a62d963SHong Zhang       nz = amat->nz*bs2;
6778a62d963SHong Zhang       for (i=0; i<nz; i++) {
678329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
679d6de1c52SSatish Balay       }
680d6de1c52SSatish Balay       v  = bmat->a;
6818a62d963SHong Zhang       nz = bmat->nz*bs2;
6828a62d963SHong Zhang       for (i=0; i<nz; i++) {
683329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
684d6de1c52SSatish Balay       }
685820f2d46SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
6868f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
6878a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
6888a62d963SHong Zhang       PetscReal *tmp,*tmp2;
689899cda47SBarry Smith       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
6908f8f2f0dSBarry Smith       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
691857a15f1SBarry Smith       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
6928a62d963SHong Zhang       v    = amat->a; jj = amat->j;
6938a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
6948a62d963SHong Zhang         for (j=0; j<bs; j++) {
6958a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
6968a62d963SHong Zhang           for (row=0; row<bs; row++) {
6978a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
6988a62d963SHong Zhang           }
6998a62d963SHong Zhang         }
7008a62d963SHong Zhang         jj++;
7018a62d963SHong Zhang       }
7028a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
7038a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
7048a62d963SHong Zhang         for (j=0; j<bs; j++) {
7058a62d963SHong Zhang           col = bs*garray[*jj] + j;
7068a62d963SHong Zhang           for (row=0; row<bs; row++) {
7078a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
7088a62d963SHong Zhang           }
7098a62d963SHong Zhang         }
7108a62d963SHong Zhang         jj++;
7118a62d963SHong Zhang       }
712820f2d46SBarry Smith       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
7138a62d963SHong Zhang       *nrm = 0.0;
714d0f46423SBarry Smith       for (j=0; j<mat->cmap->N; j++) {
7158a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
7168a62d963SHong Zhang       }
717857a15f1SBarry Smith       ierr = PetscFree(tmp);CHKERRQ(ierr);
718857a15f1SBarry Smith       ierr = PetscFree(tmp2);CHKERRQ(ierr);
7198a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
720577dd1f9SKris Buschelman       PetscReal *sums;
721785e854fSJed Brown       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
7228a62d963SHong Zhang       sum  = 0.0;
7238a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
7248a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
7258a62d963SHong Zhang         v  = amat->a + bs2*amat->i[j];
7268a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
7278a62d963SHong Zhang         for (i=0; i<nz; i++) {
7288a62d963SHong Zhang           for (col=0; col<bs; col++) {
7298a62d963SHong Zhang             for (row=0; row<bs; row++) {
7308a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
7318a62d963SHong Zhang             }
7328a62d963SHong Zhang           }
7338a62d963SHong Zhang         }
7348a62d963SHong Zhang         v  = bmat->a + bs2*bmat->i[j];
7358a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
7368a62d963SHong Zhang         for (i=0; i<nz; i++) {
7378a62d963SHong Zhang           for (col=0; col<bs; col++) {
7388a62d963SHong Zhang             for (row=0; row<bs; row++) {
7398a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
7408a62d963SHong Zhang             }
7418a62d963SHong Zhang           }
7428a62d963SHong Zhang         }
7438a62d963SHong Zhang         for (row=0; row<bs; row++) {
7448a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
7458a62d963SHong Zhang         }
7468a62d963SHong Zhang       }
747820f2d46SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
748577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
749ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
750d64ed03dSBarry Smith   }
7513a40ed3dSBarry Smith   PetscFunctionReturn(0);
752d6de1c52SSatish Balay }
75357b952d6SSatish Balay 
754fef45726SSatish Balay /*
755fef45726SSatish Balay   Creates the hash table, and sets the table
756fef45726SSatish Balay   This table is created only once.
757fef45726SSatish Balay   If new entried need to be added to the matrix
758fef45726SSatish Balay   then the hash table has to be destroyed and
759fef45726SSatish Balay   recreated.
760fef45726SSatish Balay */
761dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
762596b8d2eSBarry Smith {
763596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
764596b8d2eSBarry Smith   Mat            A     = baij->A,B=baij->B;
765596b8d2eSBarry Smith   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
766b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7676849ba73SBarry Smith   PetscErrorCode ierr;
768fca92195SBarry Smith   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
769899cda47SBarry Smith   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
770b24ad042SBarry Smith   PetscInt       *HT,key;
7713eda8832SBarry Smith   MatScalar      **HD;
772329f5518SBarry Smith   PetscReal      tmp;
7736cf91177SBarry Smith #if defined(PETSC_USE_INFO)
774b24ad042SBarry Smith   PetscInt ct=0,max=0;
7754a15367fSSatish Balay #endif
776fef45726SSatish Balay 
777d64ed03dSBarry Smith   PetscFunctionBegin;
778fca92195SBarry Smith   if (baij->ht) PetscFunctionReturn(0);
779fef45726SSatish Balay 
780fca92195SBarry Smith   baij->ht_size = (PetscInt)(factor*nz);
781fca92195SBarry Smith   ht_size       = baij->ht_size;
7820bdbc534SSatish Balay 
783fef45726SSatish Balay   /* Allocate Memory for Hash Table */
7841795a4d1SJed Brown   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
785b9e4cc15SSatish Balay   HD   = baij->hd;
786a07cd24cSSatish Balay   HT   = baij->ht;
787b9e4cc15SSatish Balay 
788596b8d2eSBarry Smith   /* Loop Over A */
7890bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
790596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
7910bdbc534SSatish Balay       row = i+rstart;
7920bdbc534SSatish Balay       col = aj[j]+cstart;
793596b8d2eSBarry Smith 
794187ce0cbSSatish Balay       key = row*Nbs + col + 1;
795fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
796fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
797fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
798fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
799fca92195SBarry Smith           HD[(h1+k)%ht_size] = a->a + j*bs2;
800596b8d2eSBarry Smith           break;
8016cf91177SBarry Smith #if defined(PETSC_USE_INFO)
802187ce0cbSSatish Balay         } else {
803187ce0cbSSatish Balay           ct++;
804187ce0cbSSatish Balay #endif
805596b8d2eSBarry Smith         }
806187ce0cbSSatish Balay       }
8076cf91177SBarry Smith #if defined(PETSC_USE_INFO)
808187ce0cbSSatish Balay       if (k> max) max = k;
809187ce0cbSSatish Balay #endif
810596b8d2eSBarry Smith     }
811596b8d2eSBarry Smith   }
812596b8d2eSBarry Smith   /* Loop Over B */
8130bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
814596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
8150bdbc534SSatish Balay       row = i+rstart;
8160bdbc534SSatish Balay       col = garray[bj[j]];
817187ce0cbSSatish Balay       key = row*Nbs + col + 1;
818fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
819fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
820fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
821fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
822fca92195SBarry Smith           HD[(h1+k)%ht_size] = b->a + j*bs2;
823596b8d2eSBarry Smith           break;
8246cf91177SBarry Smith #if defined(PETSC_USE_INFO)
825187ce0cbSSatish Balay         } else {
826187ce0cbSSatish Balay           ct++;
827187ce0cbSSatish Balay #endif
828596b8d2eSBarry Smith         }
829187ce0cbSSatish Balay       }
8306cf91177SBarry Smith #if defined(PETSC_USE_INFO)
831187ce0cbSSatish Balay       if (k> max) max = k;
832187ce0cbSSatish Balay #endif
833596b8d2eSBarry Smith     }
834596b8d2eSBarry Smith   }
835596b8d2eSBarry Smith 
836596b8d2eSBarry Smith   /* Print Summary */
8376cf91177SBarry Smith #if defined(PETSC_USE_INFO)
838fca92195SBarry Smith   for (i=0,j=0; i<ht_size; i++) {
83926fbe8dcSKarl Rupp     if (HT[i]) j++;
840c38d4ed2SBarry Smith   }
8411e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
842187ce0cbSSatish Balay #endif
8433a40ed3dSBarry Smith   PetscFunctionReturn(0);
844596b8d2eSBarry Smith }
84557b952d6SSatish Balay 
846dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847bbb85fb3SSatish Balay {
848bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
849dfbe8321SBarry Smith   PetscErrorCode ierr;
850b24ad042SBarry Smith   PetscInt       nstash,reallocs;
851bbb85fb3SSatish Balay 
852bbb85fb3SSatish Balay   PetscFunctionBegin;
85326fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
854bbb85fb3SSatish Balay 
855d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
8561e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
8578798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8581e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
85946680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
8601e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
861bbb85fb3SSatish Balay   PetscFunctionReturn(0);
862bbb85fb3SSatish Balay }
863bbb85fb3SSatish Balay 
864dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
865bbb85fb3SSatish Balay {
866bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
86791c97fd4SSatish Balay   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
8686849ba73SBarry Smith   PetscErrorCode ierr;
869b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
870e44c0bd4SBarry Smith   PetscInt       *row,*col;
871ace3abfcSBarry Smith   PetscBool      r1,r2,r3,other_disassembled;
8723eda8832SBarry Smith   MatScalar      *val;
873b24ad042SBarry Smith   PetscMPIInt    n;
874bbb85fb3SSatish Balay 
875bbb85fb3SSatish Balay   PetscFunctionBegin;
8765fd66863SKarl Rupp   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
8774cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
878a2d1c673SSatish Balay     while (1) {
8798798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
880a2d1c673SSatish Balay       if (!flg) break;
881a2d1c673SSatish Balay 
882bbb85fb3SSatish Balay       for (i=0; i<n;) {
883bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
88426fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
88526fbe8dcSKarl Rupp           if (row[j] != rstart) break;
88626fbe8dcSKarl Rupp         }
887bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
888bbb85fb3SSatish Balay         else       ncols = n-i;
889bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
8904b4eb8d3SJed Brown         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
891bbb85fb3SSatish Balay         i    = j;
892bbb85fb3SSatish Balay       }
893bbb85fb3SSatish Balay     }
8948798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
895a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
896a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
897a2d1c673SSatish Balay        restore the original flags */
898a2d1c673SSatish Balay     r1 = baij->roworiented;
899a2d1c673SSatish Balay     r2 = a->roworiented;
90091c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
90126fbe8dcSKarl Rupp 
9027c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
9037c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
90426fbe8dcSKarl Rupp 
90591c97fd4SSatish Balay     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
906a2d1c673SSatish Balay     while (1) {
9078798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
908a2d1c673SSatish Balay       if (!flg) break;
909a2d1c673SSatish Balay 
910a2d1c673SSatish Balay       for (i=0; i<n;) {
911a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
91226fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
91326fbe8dcSKarl Rupp           if (row[j] != rstart) break;
91426fbe8dcSKarl Rupp         }
915a2d1c673SSatish Balay         if (j < n) ncols = j-i;
916a2d1c673SSatish Balay         else       ncols = n-i;
9174b4eb8d3SJed Brown         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
918a2d1c673SSatish Balay         i    = j;
919a2d1c673SSatish Balay       }
920a2d1c673SSatish Balay     }
9218798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
92226fbe8dcSKarl Rupp 
923a2d1c673SSatish Balay     baij->roworiented = r1;
924a2d1c673SSatish Balay     a->roworiented    = r2;
92526fbe8dcSKarl Rupp 
92691c97fd4SSatish Balay     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
927bbb85fb3SSatish Balay   }
928bbb85fb3SSatish Balay 
929bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
930bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
931bbb85fb3SSatish Balay 
932bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
933bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
934bbb85fb3SSatish Balay   /*
935bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
936bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
937bbb85fb3SSatish Balay   */
938bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
939820f2d46SBarry Smith     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
940bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
941ab9863d7SBarry Smith       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
942bbb85fb3SSatish Balay     }
943bbb85fb3SSatish Balay   }
944bbb85fb3SSatish Balay 
945bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
946bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
947bbb85fb3SSatish Balay   }
948bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
949bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
950bbb85fb3SSatish Balay 
9516cf91177SBarry Smith #if defined(PETSC_USE_INFO)
952bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
953abf3b562SBarry Smith     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
95426fbe8dcSKarl Rupp 
955bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
956bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
957bbb85fb3SSatish Balay   }
958bbb85fb3SSatish Balay #endif
959bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
96126fbe8dcSKarl Rupp 
962bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
963bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
964bbb85fb3SSatish Balay   }
965bbb85fb3SSatish Balay 
966fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
96726fbe8dcSKarl Rupp 
968f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
9694f9cfa9eSBarry Smith 
9704f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
9714f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
972e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
973820f2d46SBarry Smith     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
974e56f5c9eSBarry Smith   }
975bbb85fb3SSatish Balay   PetscFunctionReturn(0);
976bbb85fb3SSatish Balay }
97757b952d6SSatish Balay 
9787da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
9799804daf3SBarry Smith #include <petscdraw.h>
9806849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
98157b952d6SSatish Balay {
98257b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
983dfbe8321SBarry Smith   PetscErrorCode    ierr;
9847da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
985d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
986ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
987b0a32e0cSBarry Smith   PetscViewer       sviewer;
988f3ef73ceSBarry Smith   PetscViewerFormat format;
98957b952d6SSatish Balay 
990d64ed03dSBarry Smith   PetscFunctionBegin;
991251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
992251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
99332077d6dSBarry Smith   if (iascii) {
994b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
995456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
9964e220ebcSLois Curfman McInnes       MatInfo info;
997ffc4695bSBarry Smith       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
998d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
9991575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1000b1e9c6f1SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1001b1e9c6f1SBarry Smith                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1002d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1003e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1004d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1005e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1006b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
10071575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
100807d81ca4SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
100957b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10103a40ed3dSBarry Smith       PetscFunctionReturn(0);
1011fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
101277431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
10133a40ed3dSBarry Smith       PetscFunctionReturn(0);
101404929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
101504929863SHong Zhang       PetscFunctionReturn(0);
101657b952d6SSatish Balay     }
101757b952d6SSatish Balay   }
101857b952d6SSatish Balay 
10190f5bd95cSBarry Smith   if (isdraw) {
1020b0a32e0cSBarry Smith     PetscDraw draw;
1021ace3abfcSBarry Smith     PetscBool isnull;
1022b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
102345f3bb6eSLisandro Dalcin     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
102445f3bb6eSLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
102557b952d6SSatish Balay   }
102657b952d6SSatish Balay 
10277da1fb6eSBarry Smith   {
102857b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
102957b952d6SSatish Balay     Mat         A;
103057b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1031d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
10323eda8832SBarry Smith     MatScalar   *a;
10333e219373SBarry Smith     const char  *matname;
103457b952d6SSatish Balay 
1035f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1036f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
1037ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
103857b952d6SSatish Balay     if (!rank) {
1039f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1040d64ed03dSBarry Smith     } else {
1041f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
104257b952d6SSatish Balay     }
1043f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
10440298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
10452b82e772SSatish Balay     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
10463bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
104757b952d6SSatish Balay 
104857b952d6SSatish Balay     /* copy over the A part */
104957b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
105057b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1051785e854fSJed Brown     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
105257b952d6SSatish Balay 
105357b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1054899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
105526fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
105657b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1057899cda47SBarry Smith         col = (baij->cstartbs+aj[j])*bs;
105857b952d6SSatish Balay         for (k=0; k<bs; k++) {
105997e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1060cee3aa6bSSatish Balay           col++; a += bs;
106157b952d6SSatish Balay         }
106257b952d6SSatish Balay       }
106357b952d6SSatish Balay     }
106457b952d6SSatish Balay     /* copy over the B part */
106557b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
106657b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
106757b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1068899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
106926fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
107057b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
107157b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
107257b952d6SSatish Balay         for (k=0; k<bs; k++) {
107397e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1074cee3aa6bSSatish Balay           col++; a += bs;
107557b952d6SSatish Balay         }
107657b952d6SSatish Balay       }
107757b952d6SSatish Balay     }
1078606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10796d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10806d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
108155843e3eSBarry Smith     /*
108255843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1083b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
108455843e3eSBarry Smith     */
10853f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1086ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
10873e219373SBarry Smith     if (!rank) {
1088ade3a672SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
10897da1fb6eSBarry Smith       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
109057b952d6SSatish Balay     }
10913f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
10921575c14dSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
10936bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
109457b952d6SSatish Balay   }
10953a40ed3dSBarry Smith   PetscFunctionReturn(0);
109657b952d6SSatish Balay }
109757b952d6SSatish Balay 
1098618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
1099b51a4376SLisandro Dalcin PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1100660746e0SBarry Smith {
1101b51a4376SLisandro Dalcin   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1102b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1103b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1104b51a4376SLisandro Dalcin   const PetscInt *garray = aij->garray;
1105b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1106b51a4376SLisandro Dalcin   PetscInt       *rowlens,*colidxs;
1107b51a4376SLisandro Dalcin   PetscScalar    *matvals;
1108660746e0SBarry Smith   PetscErrorCode ierr;
1109660746e0SBarry Smith 
1110660746e0SBarry Smith   PetscFunctionBegin;
1111b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1112b51a4376SLisandro Dalcin 
1113b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1114b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1115b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1116b51a4376SLisandro Dalcin   rs = mat->rmap->rstart;
1117b51a4376SLisandro Dalcin   cs = mat->cmap->rstart;
1118b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1119b51a4376SLisandro Dalcin   nz = bs*bs*(A->nz + B->nz);
1120b51a4376SLisandro Dalcin 
1121b51a4376SLisandro Dalcin   /* write matrix header */
1122660746e0SBarry Smith   header[0] = MAT_FILE_CLASSID;
1123b51a4376SLisandro Dalcin   header[1] = M; header[2] = N; header[3] = nz;
1124ffc4695bSBarry Smith   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1125b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1126660746e0SBarry Smith 
1127b51a4376SLisandro Dalcin   /* fill in and store row lengths */
1128b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1129b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1130b51a4376SLisandro Dalcin     for (j=0; j<bs; j++)
1131b51a4376SLisandro Dalcin       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1132b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1133b51a4376SLisandro Dalcin   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1134660746e0SBarry Smith 
1135b51a4376SLisandro Dalcin   /* fill in and store column indices */
1136b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1137b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++) {
1138b51a4376SLisandro Dalcin     for (k=0; k<bs; k++) {
1139b51a4376SLisandro Dalcin       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1140b51a4376SLisandro Dalcin         if (garray[B->j[jb]] > cs/bs) break;
1141b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1142b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1143660746e0SBarry Smith       }
1144b51a4376SLisandro Dalcin       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1145b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1146b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1147b51a4376SLisandro Dalcin       for (; jb<B->i[i+1]; jb++)
1148b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1149b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1150660746e0SBarry Smith     }
1151660746e0SBarry Smith   }
1152660746e0SBarry Smith   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1153b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1154b51a4376SLisandro Dalcin   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1155660746e0SBarry Smith 
1156b51a4376SLisandro Dalcin   /* fill in and store nonzero values */
1157b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1158b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++) {
1159b51a4376SLisandro Dalcin     for (k=0; k<bs; k++) {
1160b51a4376SLisandro Dalcin       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1161b51a4376SLisandro Dalcin         if (garray[B->j[jb]] > cs/bs) break;
1162b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1163b51a4376SLisandro Dalcin           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1164660746e0SBarry Smith       }
1165b51a4376SLisandro Dalcin       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1166b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1167b51a4376SLisandro Dalcin           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1168b51a4376SLisandro Dalcin       for (; jb<B->i[i+1]; jb++)
1169b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1170d21b9a37SPierre Jolivet           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1171660746e0SBarry Smith     }
1172b51a4376SLisandro Dalcin   }
1173b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1174b51a4376SLisandro Dalcin   ierr = PetscFree(matvals);CHKERRQ(ierr);
1175660746e0SBarry Smith 
1176b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
1177b51a4376SLisandro Dalcin   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1178660746e0SBarry Smith   PetscFunctionReturn(0);
1179660746e0SBarry Smith }
1180660746e0SBarry Smith 
1181dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
118257b952d6SSatish Balay {
1183dfbe8321SBarry Smith   PetscErrorCode ierr;
1184ace3abfcSBarry Smith   PetscBool      iascii,isdraw,issocket,isbinary;
118557b952d6SSatish Balay 
1186d64ed03dSBarry Smith   PetscFunctionBegin;
1187251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1188251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1189251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1190251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1191660746e0SBarry Smith   if (iascii || isdraw || issocket) {
11927b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1193660746e0SBarry Smith   } else if (isbinary) {
1194660746e0SBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
119557b952d6SSatish Balay   }
11963a40ed3dSBarry Smith   PetscFunctionReturn(0);
119757b952d6SSatish Balay }
119857b952d6SSatish Balay 
1199dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
120079bdfe76SSatish Balay {
120179bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1202dfbe8321SBarry Smith   PetscErrorCode ierr;
120379bdfe76SSatish Balay 
1204d64ed03dSBarry Smith   PetscFunctionBegin;
1205aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1206d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
120779bdfe76SSatish Balay #endif
12088798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12098798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
12106bf464f9SBarry Smith   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
12116bf464f9SBarry Smith   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1212aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
12136bc0bbbfSBarry Smith   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
121448e59246SSatish Balay #else
121505b42c5fSBarry Smith   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
121648e59246SSatish Balay #endif
121705b42c5fSBarry Smith   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
12186bf464f9SBarry Smith   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
12196bf464f9SBarry Smith   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1220fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
122105b42c5fSBarry Smith   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1222fca92195SBarry Smith   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1223899cda47SBarry Smith   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1224bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1225901853e0SKris Buschelman 
1226f4259b30SLisandro Dalcin   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1227bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1228bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1229bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1230bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1231bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1232bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1233bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1234bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
12357ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
12367ea3e4caSstefano_zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
12377ea3e4caSstefano_zampini #endif
1238c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
12393a40ed3dSBarry Smith   PetscFunctionReturn(0);
124079bdfe76SSatish Balay }
124179bdfe76SSatish Balay 
1242dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1243cee3aa6bSSatish Balay {
1244cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1245dfbe8321SBarry Smith   PetscErrorCode ierr;
1246b24ad042SBarry Smith   PetscInt       nt;
1247cee3aa6bSSatish Balay 
1248d64ed03dSBarry Smith   PetscFunctionBegin;
1249e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1250e7e72b3dSBarry Smith   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1251e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1252e7e72b3dSBarry Smith   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1253ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1255ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
12573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1258cee3aa6bSSatish Balay }
1259cee3aa6bSSatish Balay 
1260dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261cee3aa6bSSatish Balay {
1262cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1263dfbe8321SBarry Smith   PetscErrorCode ierr;
1264d64ed03dSBarry Smith 
1265d64ed03dSBarry Smith   PetscFunctionBegin;
1266ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1268ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12703a40ed3dSBarry Smith   PetscFunctionReturn(0);
1271cee3aa6bSSatish Balay }
1272cee3aa6bSSatish Balay 
1273dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1274cee3aa6bSSatish Balay {
1275cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1276dfbe8321SBarry Smith   PetscErrorCode ierr;
1277cee3aa6bSSatish Balay 
1278d64ed03dSBarry Smith   PetscFunctionBegin;
1279cee3aa6bSSatish Balay   /* do nondiagonal part */
12807c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281cee3aa6bSSatish Balay   /* do local part */
12827c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1283e4a140f6SJunchao Zhang   /* add partial results together */
1284ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1285ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
12863a40ed3dSBarry Smith   PetscFunctionReturn(0);
1287cee3aa6bSSatish Balay }
1288cee3aa6bSSatish Balay 
1289dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1290cee3aa6bSSatish Balay {
1291cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1292dfbe8321SBarry Smith   PetscErrorCode ierr;
1293cee3aa6bSSatish Balay 
1294d64ed03dSBarry Smith   PetscFunctionBegin;
1295cee3aa6bSSatish Balay   /* do nondiagonal part */
12967c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1297cee3aa6bSSatish Balay   /* do local part */
12987c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1299e4a140f6SJunchao Zhang   /* add partial results together */
1300e4a140f6SJunchao Zhang   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
13023a40ed3dSBarry Smith   PetscFunctionReturn(0);
1303cee3aa6bSSatish Balay }
1304cee3aa6bSSatish Balay 
1305cee3aa6bSSatish Balay /*
1306cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1307cee3aa6bSSatish Balay    diagonal block
1308cee3aa6bSSatish Balay */
1309dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1310cee3aa6bSSatish Balay {
1311cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312dfbe8321SBarry Smith   PetscErrorCode ierr;
1313d64ed03dSBarry Smith 
1314d64ed03dSBarry Smith   PetscFunctionBegin;
1315e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13163a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13173a40ed3dSBarry Smith   PetscFunctionReturn(0);
1318cee3aa6bSSatish Balay }
1319cee3aa6bSSatish Balay 
1320f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1321cee3aa6bSSatish Balay {
1322cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1323dfbe8321SBarry Smith   PetscErrorCode ierr;
1324d64ed03dSBarry Smith 
1325d64ed03dSBarry Smith   PetscFunctionBegin;
1326f4df32b1SMatthew Knepley   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1327f4df32b1SMatthew Knepley   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
13283a40ed3dSBarry Smith   PetscFunctionReturn(0);
1329cee3aa6bSSatish Balay }
1330026e39d0SSatish Balay 
1331b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332acdf5bf4SSatish Balay {
1333acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
133487828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
13356849ba73SBarry Smith   PetscErrorCode ierr;
1336d0f46423SBarry Smith   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1337d0f46423SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1338899cda47SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1339acdf5bf4SSatish Balay 
1340d64ed03dSBarry Smith   PetscFunctionBegin;
1341e7e72b3dSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1342e32f2f54SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1343acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1344acdf5bf4SSatish Balay 
1345acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1346acdf5bf4SSatish Balay     /*
1347acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1348acdf5bf4SSatish Balay     */
1349acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1350b24ad042SBarry Smith     PetscInt    max = 1,mbs = mat->mbs,tmp;
1351bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1352acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
135326fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1354acdf5bf4SSatish Balay     }
1355dcca6d9dSJed Brown     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1356acdf5bf4SSatish Balay   }
1357d9d09a02SSatish Balay   lrow = row - brstart;
1358acdf5bf4SSatish Balay 
1359acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1360f4259b30SLisandro Dalcin   if (!v)   {pvA = NULL; pvB = NULL;}
1361f4259b30SLisandro Dalcin   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1362f830108cSBarry Smith   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1363f830108cSBarry Smith   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1364acdf5bf4SSatish Balay   nztot = nzA + nzB;
1365acdf5bf4SSatish Balay 
1366acdf5bf4SSatish Balay   cmap = mat->garray;
1367acdf5bf4SSatish Balay   if (v  || idx) {
1368acdf5bf4SSatish Balay     if (nztot) {
1369acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1370b24ad042SBarry Smith       PetscInt imark = -1;
1371acdf5bf4SSatish Balay       if (v) {
1372acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1373acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1374d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1375acdf5bf4SSatish Balay           else break;
1376acdf5bf4SSatish Balay         }
1377acdf5bf4SSatish Balay         imark = i;
1378acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1379acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1380acdf5bf4SSatish Balay       }
1381acdf5bf4SSatish Balay       if (idx) {
1382acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1383acdf5bf4SSatish Balay         if (imark > -1) {
1384acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1385bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1386acdf5bf4SSatish Balay           }
1387acdf5bf4SSatish Balay         } else {
1388acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
138926fbe8dcSKarl Rupp             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1390acdf5bf4SSatish Balay             else break;
1391acdf5bf4SSatish Balay           }
1392acdf5bf4SSatish Balay           imark = i;
1393acdf5bf4SSatish Balay         }
1394d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1395d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1396acdf5bf4SSatish Balay       }
1397d64ed03dSBarry Smith     } else {
1398f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1399f4259b30SLisandro Dalcin       if (v)   *v   = NULL;
1400d212a18eSSatish Balay     }
1401acdf5bf4SSatish Balay   }
1402acdf5bf4SSatish Balay   *nz  = nztot;
1403f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1404f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14053a40ed3dSBarry Smith   PetscFunctionReturn(0);
1406acdf5bf4SSatish Balay }
1407acdf5bf4SSatish Balay 
1408b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1409acdf5bf4SSatish Balay {
1410acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1411d64ed03dSBarry Smith 
1412d64ed03dSBarry Smith   PetscFunctionBegin;
1413e7e72b3dSBarry Smith   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1414acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14153a40ed3dSBarry Smith   PetscFunctionReturn(0);
1416acdf5bf4SSatish Balay }
1417acdf5bf4SSatish Balay 
1418dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
141958667388SSatish Balay {
142058667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1421dfbe8321SBarry Smith   PetscErrorCode ierr;
1422d64ed03dSBarry Smith 
1423d64ed03dSBarry Smith   PetscFunctionBegin;
142458667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
142558667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14263a40ed3dSBarry Smith   PetscFunctionReturn(0);
142758667388SSatish Balay }
14280ac07820SSatish Balay 
1429dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14300ac07820SSatish Balay {
14314e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
14324e220ebcSLois Curfman McInnes   Mat            A  = a->A,B = a->B;
1433dfbe8321SBarry Smith   PetscErrorCode ierr;
14343966268fSBarry Smith   PetscLogDouble isend[5],irecv[5];
14350ac07820SSatish Balay 
1436d64ed03dSBarry Smith   PetscFunctionBegin;
1437d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
143826fbe8dcSKarl Rupp 
14394e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
144026fbe8dcSKarl Rupp 
14410e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1442de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
144326fbe8dcSKarl Rupp 
14444e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
144526fbe8dcSKarl Rupp 
14460e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1447de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
144826fbe8dcSKarl Rupp 
14490ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14504e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14514e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14524e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14534e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14544e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14550ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1456820f2d46SBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
145726fbe8dcSKarl Rupp 
14584e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14594e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14604e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14614e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14624e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14630ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1464820f2d46SBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
146526fbe8dcSKarl Rupp 
14664e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14674e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14684e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14694e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14704e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1471ce94432eSBarry Smith   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
14724e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14734e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14744e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14753a40ed3dSBarry Smith   PetscFunctionReturn(0);
14760ac07820SSatish Balay }
14770ac07820SSatish Balay 
1478ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
147958667388SSatish Balay {
148058667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1481dfbe8321SBarry Smith   PetscErrorCode ierr;
148258667388SSatish Balay 
1483d64ed03dSBarry Smith   PetscFunctionBegin;
148412c028f9SKris Buschelman   switch (op) {
1485512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
148612c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
148728b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1488a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
148912c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
149043674050SBarry Smith     MatCheckPreallocated(A,1);
14914e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
14924e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
149312c028f9SKris Buschelman     break;
149412c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
149543674050SBarry Smith     MatCheckPreallocated(A,1);
14964e0d8c25SBarry Smith     a->roworiented = flg;
149726fbe8dcSKarl Rupp 
14984e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
14994e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
150012c028f9SKris Buschelman     break;
15018c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1502071fcb05SBarry Smith   case MAT_SORTED_FULL:
1503290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
150412c028f9SKris Buschelman     break;
150512c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15064e0d8c25SBarry Smith     a->donotstash = flg;
150712c028f9SKris Buschelman     break;
150812c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15094e0d8c25SBarry Smith     a->ht_flag = flg;
1510abf3b562SBarry Smith     a->ht_fact = 1.39;
151112c028f9SKris Buschelman     break;
151277e54ba9SKris Buschelman   case MAT_SYMMETRIC:
151377e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15142188ac68SBarry Smith   case MAT_HERMITIAN:
1515c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
15162188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
151743674050SBarry Smith     MatCheckPreallocated(A,1);
15184e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
151977e54ba9SKris Buschelman     break;
152012c028f9SKris Buschelman   default:
1521ce94432eSBarry Smith     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1522d64ed03dSBarry Smith   }
15233a40ed3dSBarry Smith   PetscFunctionReturn(0);
152458667388SSatish Balay }
152558667388SSatish Balay 
1526fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
15270ac07820SSatish Balay {
15280ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
15290ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
15300ac07820SSatish Balay   Mat            B;
1531dfbe8321SBarry Smith   PetscErrorCode ierr;
1532d0f46423SBarry Smith   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1533d0f46423SBarry Smith   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
15343eda8832SBarry Smith   MatScalar      *a;
15350ac07820SSatish Balay 
1536d64ed03dSBarry Smith   PetscFunctionBegin;
1537cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1538ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1539d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
15407adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
15412e72b8d9SBarry Smith     /* Do not know preallocation information, but must set block size */
15420298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1543fc4dec0aSBarry Smith   } else {
1544fc4dec0aSBarry Smith     B = *matout;
1545fc4dec0aSBarry Smith   }
15460ac07820SSatish Balay 
15470ac07820SSatish Balay   /* copy over the A part */
15480ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15490ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550785e854fSJed Brown   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
15510ac07820SSatish Balay 
15520ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1553899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
155426fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
15550ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1556899cda47SBarry Smith       col = (baij->cstartbs+aj[j])*bs;
15570ac07820SSatish Balay       for (k=0; k<bs; k++) {
155897e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
155926fbe8dcSKarl Rupp 
15600ac07820SSatish Balay         col++; a += bs;
15610ac07820SSatish Balay       }
15620ac07820SSatish Balay     }
15630ac07820SSatish Balay   }
15640ac07820SSatish Balay   /* copy over the B part */
15650ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
15660ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
15670ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1568899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
156926fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
15700ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15710ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15720ac07820SSatish Balay       for (k=0; k<bs; k++) {
157397e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
157426fbe8dcSKarl Rupp         col++;
157526fbe8dcSKarl Rupp         a += bs;
15760ac07820SSatish Balay       }
15770ac07820SSatish Balay     }
15780ac07820SSatish Balay   }
1579606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
15800ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15810ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15820ac07820SSatish Balay 
1583cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
158426fbe8dcSKarl Rupp   else {
158528be2f97SBarry Smith     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
15860ac07820SSatish Balay   }
15873a40ed3dSBarry Smith   PetscFunctionReturn(0);
15880ac07820SSatish Balay }
15890e95ebc0SSatish Balay 
1590dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
15910e95ebc0SSatish Balay {
159236c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
159336c4a09eSSatish Balay   Mat            a     = baij->A,b = baij->B;
1594dfbe8321SBarry Smith   PetscErrorCode ierr;
1595b24ad042SBarry Smith   PetscInt       s1,s2,s3;
15960e95ebc0SSatish Balay 
1597d64ed03dSBarry Smith   PetscFunctionBegin;
159836c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
159936c4a09eSSatish Balay   if (rr) {
160036c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1601e32f2f54SBarry Smith     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
160236c4a09eSSatish Balay     /* Overlap communication with computation. */
1603ca9f406cSSatish Balay     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
160436c4a09eSSatish Balay   }
16050e95ebc0SSatish Balay   if (ll) {
16060e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1607e32f2f54SBarry Smith     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
16080298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
16090e95ebc0SSatish Balay   }
161036c4a09eSSatish Balay   /* scale  the diagonal block */
161136c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
161236c4a09eSSatish Balay 
161336c4a09eSSatish Balay   if (rr) {
161436c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1615ca9f406cSSatish Balay     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
16160298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
161736c4a09eSSatish Balay   }
16183a40ed3dSBarry Smith   PetscFunctionReturn(0);
16190e95ebc0SSatish Balay }
16200e95ebc0SSatish Balay 
16212b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
16220ac07820SSatish Balay {
16230ac07820SSatish Balay   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
162465a92638SMatthew G. Knepley   PetscInt      *lrows;
16256e520ac8SStefano Zampini   PetscInt       r, len;
162694342113SStefano Zampini   PetscBool      cong;
16276849ba73SBarry Smith   PetscErrorCode ierr;
16280ac07820SSatish Balay 
1629d64ed03dSBarry Smith   PetscFunctionBegin;
16306e520ac8SStefano Zampini   /* get locally owned rows */
16316e520ac8SStefano Zampini   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
163297b48c8fSBarry Smith   /* fix right hand side if needed */
163397b48c8fSBarry Smith   if (x && b) {
163465a92638SMatthew G. Knepley     const PetscScalar *xx;
163565a92638SMatthew G. Knepley     PetscScalar       *bb;
163665a92638SMatthew G. Knepley 
163797b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
163897b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
163965a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
164097b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
164197b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
164297b48c8fSBarry Smith   }
164397b48c8fSBarry Smith 
16440ac07820SSatish Balay   /* actually zap the local rows */
164572dacd9aSBarry Smith   /*
164672dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
1647a8c7a070SBarry Smith      is square and the user wishes to set the diagonal we use separate
164872dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
164972dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
165072dacd9aSBarry Smith 
165172dacd9aSBarry Smith   */
16529c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1653a34163a4SJed Brown   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
165494342113SStefano Zampini   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
165594342113SStefano Zampini   if ((diag != 0.0) && cong) {
1656a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1657f4df32b1SMatthew Knepley   } else if (diag != 0.0) {
1658f4259b30SLisandro Dalcin     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1659e7e72b3dSBarry Smith     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1660512a5fc5SBarry Smith        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
166165a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) {
166265a92638SMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
1663f4df32b1SMatthew Knepley       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664a07cd24cSSatish Balay     }
1665a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16679c957beeSSatish Balay   } else {
1668a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1669a07cd24cSSatish Balay   }
1670606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
16714f9cfa9eSBarry Smith 
16724f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
16734f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1674e56f5c9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1675820f2d46SBarry Smith     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1676e56f5c9eSBarry Smith   }
16773a40ed3dSBarry Smith   PetscFunctionReturn(0);
16780ac07820SSatish Balay }
167972dacd9aSBarry Smith 
16806f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
16816f0a72daSMatthew G. Knepley {
16826f0a72daSMatthew G. Knepley   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
16836f0a72daSMatthew G. Knepley   PetscErrorCode    ierr;
1684131c27b5Sprj-   PetscMPIInt       n = A->rmap->n,p = 0;
1685131c27b5Sprj-   PetscInt          i,j,k,r,len = 0,row,col,count;
16866f0a72daSMatthew G. Knepley   PetscInt          *lrows,*owners = A->rmap->range;
16876f0a72daSMatthew G. Knepley   PetscSFNode       *rrows;
16886f0a72daSMatthew G. Knepley   PetscSF           sf;
16896f0a72daSMatthew G. Knepley   const PetscScalar *xx;
16906f0a72daSMatthew G. Knepley   PetscScalar       *bb,*mask;
16916f0a72daSMatthew G. Knepley   Vec               xmask,lmask;
16926f0a72daSMatthew G. Knepley   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
16936f0a72daSMatthew G. Knepley   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
16946f0a72daSMatthew G. Knepley   PetscScalar       *aa;
16956f0a72daSMatthew G. Knepley 
16966f0a72daSMatthew G. Knepley   PetscFunctionBegin;
16976f0a72daSMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
16986f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
16996f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
17006f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
17016f0a72daSMatthew G. Knepley   for (r = 0; r < N; ++r) {
17026f0a72daSMatthew G. Knepley     const PetscInt idx   = rows[r];
17035ba17502SJed Brown     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
17045ba17502SJed Brown     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
17055ba17502SJed Brown       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
17065ba17502SJed Brown     }
17076f0a72daSMatthew G. Knepley     rrows[r].rank  = p;
17086f0a72daSMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
17096f0a72daSMatthew G. Knepley   }
17106f0a72daSMatthew G. Knepley   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
17116f0a72daSMatthew G. Knepley   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
17126f0a72daSMatthew G. Knepley   /* Collect flags for rows to be zeroed */
17136f0a72daSMatthew G. Knepley   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
17146f0a72daSMatthew G. Knepley   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
17156f0a72daSMatthew G. Knepley   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
17166f0a72daSMatthew G. Knepley   /* Compress and put in row numbers */
17176f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
17186f0a72daSMatthew G. Knepley   /* zero diagonal part of matrix */
17196f0a72daSMatthew G. Knepley   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
17206f0a72daSMatthew G. Knepley   /* handle off diagonal part of matrix */
17212a7a6963SBarry Smith   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
17226f0a72daSMatthew G. Knepley   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
17236f0a72daSMatthew G. Knepley   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
17246f0a72daSMatthew G. Knepley   for (i=0; i<len; i++) bb[lrows[i]] = 1;
17256f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
17266f0a72daSMatthew G. Knepley   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17276f0a72daSMatthew G. Knepley   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17286f0a72daSMatthew G. Knepley   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
17296f0a72daSMatthew G. Knepley   if (x) {
17306f0a72daSMatthew G. Knepley     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17316f0a72daSMatthew G. Knepley     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17326f0a72daSMatthew G. Knepley     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
17336f0a72daSMatthew G. Knepley     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
17346f0a72daSMatthew G. Knepley   }
17356f0a72daSMatthew G. Knepley   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
17366f0a72daSMatthew G. Knepley   /* remove zeroed rows of off diagonal matrix */
17376f0a72daSMatthew G. Knepley   for (i = 0; i < len; ++i) {
17386f0a72daSMatthew G. Knepley     row   = lrows[i];
17396f0a72daSMatthew G. Knepley     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
17406f0a72daSMatthew G. Knepley     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
17416f0a72daSMatthew G. Knepley     for (k = 0; k < count; ++k) {
17426f0a72daSMatthew G. Knepley       aa[0] = 0.0;
17436f0a72daSMatthew G. Knepley       aa   += bs;
17446f0a72daSMatthew G. Knepley     }
17456f0a72daSMatthew G. Knepley   }
17466f0a72daSMatthew G. Knepley   /* loop over all elements of off process part of matrix zeroing removed columns*/
17476f0a72daSMatthew G. Knepley   for (i = 0; i < l->B->rmap->N; ++i) {
17486f0a72daSMatthew G. Knepley     row = i/bs;
17496f0a72daSMatthew G. Knepley     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
17506f0a72daSMatthew G. Knepley       for (k = 0; k < bs; ++k) {
17516f0a72daSMatthew G. Knepley         col = bs*baij->j[j] + k;
17526f0a72daSMatthew G. Knepley         if (PetscAbsScalar(mask[col])) {
17536f0a72daSMatthew G. Knepley           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
175489ae1891SBarry Smith           if (x) bb[i] -= aa[0]*xx[col];
17556f0a72daSMatthew G. Knepley           aa[0] = 0.0;
17566f0a72daSMatthew G. Knepley         }
17576f0a72daSMatthew G. Knepley       }
17586f0a72daSMatthew G. Knepley     }
17596f0a72daSMatthew G. Knepley   }
17606f0a72daSMatthew G. Knepley   if (x) {
17616f0a72daSMatthew G. Knepley     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
17626f0a72daSMatthew G. Knepley     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
17636f0a72daSMatthew G. Knepley   }
17646f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
17656f0a72daSMatthew G. Knepley   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
17666f0a72daSMatthew G. Knepley   ierr = PetscFree(lrows);CHKERRQ(ierr);
17674f9cfa9eSBarry Smith 
17684f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
17694f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
17704f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1771820f2d46SBarry Smith     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
17724f9cfa9eSBarry Smith   }
17736f0a72daSMatthew G. Knepley   PetscFunctionReturn(0);
17746f0a72daSMatthew G. Knepley }
17756f0a72daSMatthew G. Knepley 
1776dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1777bb5a7306SBarry Smith {
1778bb5a7306SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1779dfbe8321SBarry Smith   PetscErrorCode ierr;
1780d64ed03dSBarry Smith 
1781d64ed03dSBarry Smith   PetscFunctionBegin;
1782bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
17833a40ed3dSBarry Smith   PetscFunctionReturn(0);
1784bb5a7306SBarry Smith }
1785bb5a7306SBarry Smith 
17866849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
17870ac07820SSatish Balay 
1788ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
17897fc3c18eSBarry Smith {
17907fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
17917fc3c18eSBarry Smith   Mat            a,b,c,d;
1792ace3abfcSBarry Smith   PetscBool      flg;
1793dfbe8321SBarry Smith   PetscErrorCode ierr;
17947fc3c18eSBarry Smith 
17957fc3c18eSBarry Smith   PetscFunctionBegin;
17967fc3c18eSBarry Smith   a = matA->A; b = matA->B;
17977fc3c18eSBarry Smith   c = matB->A; d = matB->B;
17987fc3c18eSBarry Smith 
17997fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1800abc0a331SBarry Smith   if (flg) {
18017fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18027fc3c18eSBarry Smith   }
1803820f2d46SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
18047fc3c18eSBarry Smith   PetscFunctionReturn(0);
18057fc3c18eSBarry Smith }
18067fc3c18eSBarry Smith 
18073c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
18083c896bc6SHong Zhang {
18093c896bc6SHong Zhang   PetscErrorCode ierr;
18103c896bc6SHong Zhang   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
18113c896bc6SHong Zhang   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
18123c896bc6SHong Zhang 
18133c896bc6SHong Zhang   PetscFunctionBegin;
18143c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
18153c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
18163c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
18173c896bc6SHong Zhang   } else {
18183c896bc6SHong Zhang     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
18193c896bc6SHong Zhang     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
18203c896bc6SHong Zhang   }
1821cdc753b6SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
18223c896bc6SHong Zhang   PetscFunctionReturn(0);
18233c896bc6SHong Zhang }
1824273d9f13SBarry Smith 
18254994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1826273d9f13SBarry Smith {
1827dfbe8321SBarry Smith   PetscErrorCode ierr;
1828273d9f13SBarry Smith 
1829273d9f13SBarry Smith   PetscFunctionBegin;
1830f4259b30SLisandro Dalcin   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1831273d9f13SBarry Smith   PetscFunctionReturn(0);
1832273d9f13SBarry Smith }
1833273d9f13SBarry Smith 
18344de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
18354de5dceeSHong Zhang {
1836001ddc4fSHong Zhang   PetscErrorCode ierr;
1837001ddc4fSHong Zhang   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
18384de5dceeSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
18394de5dceeSHong Zhang   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
18404de5dceeSHong Zhang 
18414de5dceeSHong Zhang   PetscFunctionBegin;
1842001ddc4fSHong Zhang   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
18434de5dceeSHong Zhang   PetscFunctionReturn(0);
18444de5dceeSHong Zhang }
18454de5dceeSHong Zhang 
18464fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
18474fe895cdSHong Zhang {
18484fe895cdSHong Zhang   PetscErrorCode ierr;
18494fe895cdSHong Zhang   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
18504fe895cdSHong Zhang   PetscBLASInt   bnz,one=1;
18514fe895cdSHong Zhang   Mat_SeqBAIJ    *x,*y;
1852b31f67cfSBarry Smith   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
18534fe895cdSHong Zhang 
18544fe895cdSHong Zhang   PetscFunctionBegin;
18554fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
18564fe895cdSHong Zhang     PetscScalar alpha = a;
18574fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->A->data;
18584fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->A->data;
1859b31f67cfSBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
18608b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
18614fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->B->data;
18624fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->B->data;
1863b31f67cfSBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
18648b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1865a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1866ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1867ab784542SHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
18684fe895cdSHong Zhang   } else {
18694de5dceeSHong Zhang     Mat      B;
18704de5dceeSHong Zhang     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
18714de5dceeSHong Zhang     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
18724de5dceeSHong Zhang     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
18734de5dceeSHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
18744de5dceeSHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
18754de5dceeSHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
18764de5dceeSHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
18774de5dceeSHong Zhang     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
18784de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
18794de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
18804de5dceeSHong Zhang     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
18814de5dceeSHong Zhang     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
18824de5dceeSHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
188328be2f97SBarry Smith     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
18844de5dceeSHong Zhang     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
18854de5dceeSHong Zhang     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
18864fe895cdSHong Zhang   }
18874fe895cdSHong Zhang   PetscFunctionReturn(0);
18884fe895cdSHong Zhang }
18894fe895cdSHong Zhang 
18902726fb6dSPierre Jolivet PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
18912726fb6dSPierre Jolivet {
18922726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX)
18932726fb6dSPierre Jolivet   PetscErrorCode ierr;
18942726fb6dSPierre Jolivet   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
18952726fb6dSPierre Jolivet 
18962726fb6dSPierre Jolivet   PetscFunctionBegin;
18972726fb6dSPierre Jolivet   ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr);
18982726fb6dSPierre Jolivet   ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr);
18992726fb6dSPierre Jolivet #else
19002726fb6dSPierre Jolivet   PetscFunctionBegin;
19012726fb6dSPierre Jolivet #endif
19022726fb6dSPierre Jolivet   PetscFunctionReturn(0);
19032726fb6dSPierre Jolivet }
19042726fb6dSPierre Jolivet 
190599cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
190699cafbc1SBarry Smith {
190799cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
190899cafbc1SBarry Smith   PetscErrorCode ierr;
190999cafbc1SBarry Smith 
191099cafbc1SBarry Smith   PetscFunctionBegin;
191199cafbc1SBarry Smith   ierr = MatRealPart(a->A);CHKERRQ(ierr);
191299cafbc1SBarry Smith   ierr = MatRealPart(a->B);CHKERRQ(ierr);
191399cafbc1SBarry Smith   PetscFunctionReturn(0);
191499cafbc1SBarry Smith }
191599cafbc1SBarry Smith 
191699cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
191799cafbc1SBarry Smith {
191899cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
191999cafbc1SBarry Smith   PetscErrorCode ierr;
192099cafbc1SBarry Smith 
192199cafbc1SBarry Smith   PetscFunctionBegin;
192299cafbc1SBarry Smith   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
192399cafbc1SBarry Smith   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
192499cafbc1SBarry Smith   PetscFunctionReturn(0);
192599cafbc1SBarry Smith }
192699cafbc1SBarry Smith 
19277dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
19284aa3045dSJed Brown {
19294aa3045dSJed Brown   PetscErrorCode ierr;
19304aa3045dSJed Brown   IS             iscol_local;
19314aa3045dSJed Brown   PetscInt       csize;
19324aa3045dSJed Brown 
19334aa3045dSJed Brown   PetscFunctionBegin;
19344aa3045dSJed Brown   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1935b79d0421SJed Brown   if (call == MAT_REUSE_MATRIX) {
1936b79d0421SJed Brown     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1937e32f2f54SBarry Smith     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1938b79d0421SJed Brown   } else {
19394aa3045dSJed Brown     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1940b79d0421SJed Brown   }
19417dae84e0SHong Zhang   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1942b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
1943b79d0421SJed Brown     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
19446bf464f9SBarry Smith     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1945b79d0421SJed Brown   }
19464aa3045dSJed Brown   PetscFunctionReturn(0);
19474aa3045dSJed Brown }
194817df9f7cSHong Zhang 
194982094794SBarry Smith /*
195082094794SBarry Smith   Not great since it makes two copies of the submatrix, first an SeqBAIJ
195182094794SBarry Smith   in local and then by concatenating the local matrices the end result.
19527dae84e0SHong Zhang   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
19538f46ffcaSHong Zhang   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
195482094794SBarry Smith */
19557dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
195682094794SBarry Smith {
195782094794SBarry Smith   PetscErrorCode ierr;
195882094794SBarry Smith   PetscMPIInt    rank,size;
195982094794SBarry Smith   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1960c9ffca76SHong Zhang   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
196129dcf524SDmitry Karpeev   Mat            M,Mreuse;
196282094794SBarry Smith   MatScalar      *vwork,*aa;
1963ce94432eSBarry Smith   MPI_Comm       comm;
196429dcf524SDmitry Karpeev   IS             isrow_new, iscol_new;
196582094794SBarry Smith   Mat_SeqBAIJ    *aij;
196682094794SBarry Smith 
196782094794SBarry Smith   PetscFunctionBegin;
1968ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1969ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
1970ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
197129dcf524SDmitry Karpeev   /* The compression and expansion should be avoided. Doesn't point
197229dcf524SDmitry Karpeev      out errors, might change the indices, hence buggey */
197329dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
197429dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
197582094794SBarry Smith 
197682094794SBarry Smith   if (call ==  MAT_REUSE_MATRIX) {
197782094794SBarry Smith     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1978e32f2f54SBarry Smith     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
19797dae84e0SHong Zhang     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
198082094794SBarry Smith   } else {
19817dae84e0SHong Zhang     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
198282094794SBarry Smith   }
198329dcf524SDmitry Karpeev   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
198429dcf524SDmitry Karpeev   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
198582094794SBarry Smith   /*
198682094794SBarry Smith       m - number of local rows
198782094794SBarry Smith       n - number of columns (same on all processors)
198882094794SBarry Smith       rstart - first row in new global matrix generated
198982094794SBarry Smith   */
199082094794SBarry Smith   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
199182094794SBarry Smith   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
199282094794SBarry Smith   m    = m/bs;
199382094794SBarry Smith   n    = n/bs;
199482094794SBarry Smith 
199582094794SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
199682094794SBarry Smith     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
199782094794SBarry Smith     ii  = aij->i;
199882094794SBarry Smith     jj  = aij->j;
199982094794SBarry Smith 
200082094794SBarry Smith     /*
200182094794SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
200282094794SBarry Smith         portions of the matrix in order to do correct preallocation
200382094794SBarry Smith     */
200482094794SBarry Smith 
200582094794SBarry Smith     /* first get start and end of "diagonal" columns */
200682094794SBarry Smith     if (csize == PETSC_DECIDE) {
200782094794SBarry Smith       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
200882094794SBarry Smith       if (mglobal == n*bs) { /* square matrix */
200982094794SBarry Smith         nlocal = m;
201082094794SBarry Smith       } else {
201182094794SBarry Smith         nlocal = n/size + ((n % size) > rank);
201282094794SBarry Smith       }
201382094794SBarry Smith     } else {
201482094794SBarry Smith       nlocal = csize/bs;
201582094794SBarry Smith     }
201655b25c41SPierre Jolivet     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
201782094794SBarry Smith     rstart = rend - nlocal;
201865e19b50SBarry Smith     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
201982094794SBarry Smith 
202082094794SBarry Smith     /* next, compute all the lengths */
2021dcca6d9dSJed Brown     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
202282094794SBarry Smith     for (i=0; i<m; i++) {
202382094794SBarry Smith       jend = ii[i+1] - ii[i];
202482094794SBarry Smith       olen = 0;
202582094794SBarry Smith       dlen = 0;
202682094794SBarry Smith       for (j=0; j<jend; j++) {
202782094794SBarry Smith         if (*jj < rstart || *jj >= rend) olen++;
202882094794SBarry Smith         else dlen++;
202982094794SBarry Smith         jj++;
203082094794SBarry Smith       }
203182094794SBarry Smith       olens[i] = olen;
203282094794SBarry Smith       dlens[i] = dlen;
203382094794SBarry Smith     }
203482094794SBarry Smith     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
203582094794SBarry Smith     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
203682094794SBarry Smith     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
203782094794SBarry Smith     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
20388f46ffcaSHong Zhang     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2039eb9baa12SBarry Smith     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
204082094794SBarry Smith   } else {
204182094794SBarry Smith     PetscInt ml,nl;
204282094794SBarry Smith 
204382094794SBarry Smith     M    = *newmat;
204482094794SBarry Smith     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2045e32f2f54SBarry Smith     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
204682094794SBarry Smith     ierr = MatZeroEntries(M);CHKERRQ(ierr);
204782094794SBarry Smith     /*
204882094794SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
204982094794SBarry Smith        rather than the slower MatSetValues().
205082094794SBarry Smith     */
205182094794SBarry Smith     M->was_assembled = PETSC_TRUE;
205282094794SBarry Smith     M->assembled     = PETSC_FALSE;
205382094794SBarry Smith   }
205482094794SBarry Smith   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
205582094794SBarry Smith   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
205682094794SBarry Smith   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
205782094794SBarry Smith   ii   = aij->i;
205882094794SBarry Smith   jj   = aij->j;
205982094794SBarry Smith   aa   = aij->a;
206082094794SBarry Smith   for (i=0; i<m; i++) {
206182094794SBarry Smith     row   = rstart/bs + i;
206282094794SBarry Smith     nz    = ii[i+1] - ii[i];
206382094794SBarry Smith     cwork = jj;     jj += nz;
206475f6568bSJed Brown     vwork = aa;     aa += nz*bs*bs;
206582094794SBarry Smith     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
206682094794SBarry Smith   }
206782094794SBarry Smith 
206882094794SBarry Smith   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
206982094794SBarry Smith   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
207082094794SBarry Smith   *newmat = M;
207182094794SBarry Smith 
207282094794SBarry Smith   /* save submatrix used in processor for next request */
207382094794SBarry Smith   if (call ==  MAT_INITIAL_MATRIX) {
207482094794SBarry Smith     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
207582094794SBarry Smith     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
207682094794SBarry Smith   }
207782094794SBarry Smith   PetscFunctionReturn(0);
207882094794SBarry Smith }
207982094794SBarry Smith 
208082094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
208182094794SBarry Smith {
208282094794SBarry Smith   MPI_Comm       comm,pcomm;
2083a0a83eb5SRémi Lacroix   PetscInt       clocal_size,nrows;
208482094794SBarry Smith   const PetscInt *rows;
2085dbf0e21dSBarry Smith   PetscMPIInt    size;
2086a0a83eb5SRémi Lacroix   IS             crowp,lcolp;
208782094794SBarry Smith   PetscErrorCode ierr;
208882094794SBarry Smith 
208982094794SBarry Smith   PetscFunctionBegin;
209082094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
209182094794SBarry Smith   /* make a collective version of 'rowp' */
209282094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
209382094794SBarry Smith   if (pcomm==comm) {
209482094794SBarry Smith     crowp = rowp;
209582094794SBarry Smith   } else {
209682094794SBarry Smith     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
209782094794SBarry Smith     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
209870b3c8c7SBarry Smith     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
209982094794SBarry Smith     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
210082094794SBarry Smith   }
2101a0a83eb5SRémi Lacroix   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2102a0a83eb5SRémi Lacroix   /* make a local version of 'colp' */
210382094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2104ffc4695bSBarry Smith   ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr);
2105dbf0e21dSBarry Smith   if (size==1) {
210682094794SBarry Smith     lcolp = colp;
210782094794SBarry Smith   } else {
210875f6568bSJed Brown     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
210982094794SBarry Smith   }
2110dbf0e21dSBarry Smith   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
211175f6568bSJed Brown   /* now we just get the submatrix */
21127afc1a8bSJed Brown   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
21137dae84e0SHong Zhang   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2114a0a83eb5SRémi Lacroix   /* clean up */
2115a0a83eb5SRémi Lacroix   if (pcomm!=comm) {
2116a0a83eb5SRémi Lacroix     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2117a0a83eb5SRémi Lacroix   }
2118dbf0e21dSBarry Smith   if (size>1) {
21196bf464f9SBarry Smith     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
212082094794SBarry Smith   }
212182094794SBarry Smith   PetscFunctionReturn(0);
212282094794SBarry Smith }
212382094794SBarry Smith 
21247087cfbeSBarry Smith PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
21258c7482ecSBarry Smith {
21268c7482ecSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
21278c7482ecSBarry Smith   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
21288c7482ecSBarry Smith 
21298c7482ecSBarry Smith   PetscFunctionBegin;
213026fbe8dcSKarl Rupp   if (nghosts) *nghosts = B->nbs;
213126fbe8dcSKarl Rupp   if (ghosts) *ghosts = baij->garray;
21328c7482ecSBarry Smith   PetscFunctionReturn(0);
21338c7482ecSBarry Smith }
21348c7482ecSBarry Smith 
2135d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2136f6d58c54SBarry Smith {
2137f6d58c54SBarry Smith   Mat            B;
2138f6d58c54SBarry Smith   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2139f6d58c54SBarry Smith   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2140f6d58c54SBarry Smith   Mat_SeqAIJ     *b;
2141f6d58c54SBarry Smith   PetscErrorCode ierr;
2142f4259b30SLisandro Dalcin   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2143f6d58c54SBarry Smith   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2144f6d58c54SBarry Smith   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2145f6d58c54SBarry Smith 
2146f6d58c54SBarry Smith   PetscFunctionBegin;
2147ffc4695bSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2148ffc4695bSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2149f6d58c54SBarry Smith 
2150f6d58c54SBarry Smith   /* ----------------------------------------------------------------
2151f6d58c54SBarry Smith      Tell every processor the number of nonzeros per row
2152f6d58c54SBarry Smith   */
2153854ce69bSBarry Smith   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2154f6d58c54SBarry Smith   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2155f6d58c54SBarry Smith     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2156f6d58c54SBarry Smith   }
2157785e854fSJed Brown   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2158f6d58c54SBarry Smith   displs    = recvcounts + size;
2159f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2160f6d58c54SBarry Smith     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2161f6d58c54SBarry Smith     displs[i]     = A->rmap->range[i]/bs;
2162f6d58c54SBarry Smith   }
2163f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2164ffc4695bSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2165f6d58c54SBarry Smith #else
21663d3eaba7SBarry Smith   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2167ffc4695bSBarry Smith   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2168f6d58c54SBarry Smith #endif
2169f6d58c54SBarry Smith   /* ---------------------------------------------------------------
2170f6d58c54SBarry Smith      Create the sequential matrix of the same type as the local block diagonal
2171f6d58c54SBarry Smith   */
2172f6d58c54SBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2173f6d58c54SBarry Smith   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2174f6d58c54SBarry Smith   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2175f6d58c54SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2176f6d58c54SBarry Smith   b    = (Mat_SeqAIJ*)B->data;
2177f6d58c54SBarry Smith 
2178f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2179f6d58c54SBarry Smith     Copy my part of matrix column indices over
2180f6d58c54SBarry Smith   */
2181f6d58c54SBarry Smith   sendcount  = ad->nz + bd->nz;
2182f6d58c54SBarry Smith   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2183f6d58c54SBarry Smith   a_jsendbuf = ad->j;
2184f6d58c54SBarry Smith   b_jsendbuf = bd->j;
2185f6d58c54SBarry Smith   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2186f6d58c54SBarry Smith   cnt        = 0;
2187f6d58c54SBarry Smith   for (i=0; i<n; i++) {
2188f6d58c54SBarry Smith 
2189f6d58c54SBarry Smith     /* put in lower diagonal portion */
2190f6d58c54SBarry Smith     m = bd->i[i+1] - bd->i[i];
2191f6d58c54SBarry Smith     while (m > 0) {
2192f6d58c54SBarry Smith       /* is it above diagonal (in bd (compressed) numbering) */
2193f6d58c54SBarry Smith       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2194f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2195f6d58c54SBarry Smith       m--;
2196f6d58c54SBarry Smith     }
2197f6d58c54SBarry Smith 
2198f6d58c54SBarry Smith     /* put in diagonal portion */
2199f6d58c54SBarry Smith     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2200f6d58c54SBarry Smith       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2201f6d58c54SBarry Smith     }
2202f6d58c54SBarry Smith 
2203f6d58c54SBarry Smith     /* put in upper diagonal portion */
2204f6d58c54SBarry Smith     while (m-- > 0) {
2205f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2206f6d58c54SBarry Smith     }
2207f6d58c54SBarry Smith   }
2208e32f2f54SBarry Smith   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2209f6d58c54SBarry Smith 
2210f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2211f6d58c54SBarry Smith     Gather all column indices to all processors
2212f6d58c54SBarry Smith   */
2213f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2214f6d58c54SBarry Smith     recvcounts[i] = 0;
2215f6d58c54SBarry Smith     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2216f6d58c54SBarry Smith       recvcounts[i] += lens[j];
2217f6d58c54SBarry Smith     }
2218f6d58c54SBarry Smith   }
2219f6d58c54SBarry Smith   displs[0] = 0;
2220f6d58c54SBarry Smith   for (i=1; i<size; i++) {
2221f6d58c54SBarry Smith     displs[i] = displs[i-1] + recvcounts[i-1];
2222f6d58c54SBarry Smith   }
2223f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2224ffc4695bSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2225f6d58c54SBarry Smith #else
2226ffc4695bSBarry Smith   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2227f6d58c54SBarry Smith #endif
2228f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2229f6d58c54SBarry Smith     Assemble the matrix into useable form (note numerical values not yet set)
2230f6d58c54SBarry Smith   */
2231f6d58c54SBarry Smith   /* set the b->ilen (length of each row) values */
2232580bdb30SBarry Smith   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2233f6d58c54SBarry Smith   /* set the b->i indices */
2234f6d58c54SBarry Smith   b->i[0] = 0;
2235f6d58c54SBarry Smith   for (i=1; i<=A->rmap->N/bs; i++) {
2236f6d58c54SBarry Smith     b->i[i] = b->i[i-1] + lens[i-1];
2237f6d58c54SBarry Smith   }
2238f6d58c54SBarry Smith   ierr = PetscFree(lens);CHKERRQ(ierr);
2239f6d58c54SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2240f6d58c54SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2241f6d58c54SBarry Smith   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2242f6d58c54SBarry Smith 
2243f6d58c54SBarry Smith   if (A->symmetric) {
2244f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2245f6d58c54SBarry Smith   } else if (A->hermitian) {
2246f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2247f6d58c54SBarry Smith   } else if (A->structurally_symmetric) {
2248f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2249f6d58c54SBarry Smith   }
2250f6d58c54SBarry Smith   *newmat = B;
2251f6d58c54SBarry Smith   PetscFunctionReturn(0);
2252f6d58c54SBarry Smith }
2253f6d58c54SBarry Smith 
2254b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2255b1a666ecSBarry Smith {
2256b1a666ecSBarry Smith   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2257b1a666ecSBarry Smith   PetscErrorCode ierr;
2258f4259b30SLisandro Dalcin   Vec            bb1 = NULL;
2259b1a666ecSBarry Smith 
2260b1a666ecSBarry Smith   PetscFunctionBegin;
2261b1a666ecSBarry Smith   if (flag == SOR_APPLY_UPPER) {
2262b1a666ecSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2263b1a666ecSBarry Smith     PetscFunctionReturn(0);
2264b1a666ecSBarry Smith   }
2265b1a666ecSBarry Smith 
22664e980039SJed Brown   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
22674e980039SJed Brown     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
22684e980039SJed Brown   }
22694e980039SJed Brown 
2270b1a666ecSBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2271b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2272b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2273b1a666ecSBarry Smith       its--;
2274b1a666ecSBarry Smith     }
2275b1a666ecSBarry Smith 
2276b1a666ecSBarry Smith     while (its--) {
2277b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2278b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2279b1a666ecSBarry Smith 
2280b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2281b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2282b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2283b1a666ecSBarry Smith 
2284b1a666ecSBarry Smith       /* local sweep */
2285b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2286b1a666ecSBarry Smith     }
2287b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2288b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2289b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2290b1a666ecSBarry Smith       its--;
2291b1a666ecSBarry Smith     }
2292b1a666ecSBarry Smith     while (its--) {
2293b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2294b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2295b1a666ecSBarry Smith 
2296b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2297b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2298b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2299b1a666ecSBarry Smith 
2300b1a666ecSBarry Smith       /* local sweep */
2301b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2302b1a666ecSBarry Smith     }
2303b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2304b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2305b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2306b1a666ecSBarry Smith       its--;
2307b1a666ecSBarry Smith     }
2308b1a666ecSBarry Smith     while (its--) {
2309b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2310b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2311b1a666ecSBarry Smith 
2312b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2313b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2314b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2315b1a666ecSBarry Smith 
2316b1a666ecSBarry Smith       /* local sweep */
2317b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2318b1a666ecSBarry Smith     }
2319ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2320b1a666ecSBarry Smith 
23216bf464f9SBarry Smith   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2322b1a666ecSBarry Smith   PetscFunctionReturn(0);
2323b1a666ecSBarry Smith }
2324b1a666ecSBarry Smith 
2325*a873a8cdSSam Reynolds PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,ReductionType type,PetscReal *reductions)
232647f7623dSRémi Lacroix {
232747f7623dSRémi Lacroix   PetscErrorCode ierr;
232847f7623dSRémi Lacroix   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2329*a873a8cdSSam Reynolds   PetscInt       m,N,i,*garray = aij->garray;
233047f7623dSRémi Lacroix   PetscInt       ib,jb,bs = A->rmap->bs;
233147f7623dSRémi Lacroix   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
233247f7623dSRémi Lacroix   MatScalar      *a_val = a_aij->a;
233347f7623dSRémi Lacroix   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
233447f7623dSRémi Lacroix   MatScalar      *b_val = b_aij->a;
233547f7623dSRémi Lacroix   PetscReal      *work;
233647f7623dSRémi Lacroix 
233747f7623dSRémi Lacroix   PetscFunctionBegin;
2338*a873a8cdSSam Reynolds   ierr = MatGetSize(A,&m,&N);CHKERRQ(ierr);
23391795a4d1SJed Brown   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2340*a873a8cdSSam Reynolds   if (type == REDUCTION_NORM_2) {
234147f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
234247f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
234347f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
234447f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
234547f7623dSRémi Lacroix           a_val++;
234647f7623dSRémi Lacroix         }
234747f7623dSRémi Lacroix       }
234847f7623dSRémi Lacroix     }
234947f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
235047f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
235147f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
235247f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
235347f7623dSRémi Lacroix           b_val++;
235447f7623dSRémi Lacroix         }
235547f7623dSRémi Lacroix       }
235647f7623dSRémi Lacroix     }
2357*a873a8cdSSam Reynolds   } else if (type == REDUCTION_NORM_1) {
235847f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
235947f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
236047f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
236147f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
236247f7623dSRémi Lacroix           a_val++;
236347f7623dSRémi Lacroix         }
236447f7623dSRémi Lacroix       }
236547f7623dSRémi Lacroix     }
236647f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
236747f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
236847f7623dSRémi Lacroix        for (ib=0; ib<bs; ib++) {
236947f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
237047f7623dSRémi Lacroix           b_val++;
237147f7623dSRémi Lacroix         }
237247f7623dSRémi Lacroix       }
237347f7623dSRémi Lacroix     }
2374*a873a8cdSSam Reynolds   } else if (type == REDUCTION_NORM_INFINITY) {
237547f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
237647f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
237747f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
237847f7623dSRémi Lacroix           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
237947f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
238047f7623dSRémi Lacroix           a_val++;
238147f7623dSRémi Lacroix         }
238247f7623dSRémi Lacroix       }
238347f7623dSRémi Lacroix     }
238447f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
238547f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
238647f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
238747f7623dSRémi Lacroix           int col = garray[b_aij->j[i]] * bs + jb;
238847f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
238947f7623dSRémi Lacroix           b_val++;
239047f7623dSRémi Lacroix         }
239147f7623dSRémi Lacroix       }
239247f7623dSRémi Lacroix     }
2393*a873a8cdSSam Reynolds   } else if (type == REDUCTION_SUM || REDUCTION_MEAN) {
2394*a873a8cdSSam Reynolds     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2395*a873a8cdSSam Reynolds       for (jb=0; jb<bs; jb++) {
2396*a873a8cdSSam Reynolds         for (ib=0; ib<bs; ib++) {
2397*a873a8cdSSam Reynolds           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += *a_val;
2398*a873a8cdSSam Reynolds           a_val++;
2399*a873a8cdSSam Reynolds         }
2400*a873a8cdSSam Reynolds       }
2401*a873a8cdSSam Reynolds     }
2402*a873a8cdSSam Reynolds     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2403*a873a8cdSSam Reynolds       for (jb=0; jb<bs; jb++) {
2404*a873a8cdSSam Reynolds        for (ib=0; ib<bs; ib++) {
2405*a873a8cdSSam Reynolds           work[garray[b_aij->j[i]] * bs + jb] += *b_val;
2406*a873a8cdSSam Reynolds           b_val++;
2407*a873a8cdSSam Reynolds         }
2408*a873a8cdSSam Reynolds       }
2409*a873a8cdSSam Reynolds     }
2410*a873a8cdSSam Reynolds   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown ReductionType");
2411*a873a8cdSSam Reynolds   if (type == REDUCTION_NORM_INFINITY) {
2412*a873a8cdSSam Reynolds     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
241347f7623dSRémi Lacroix   } else {
2414*a873a8cdSSam Reynolds     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
241547f7623dSRémi Lacroix   }
241647f7623dSRémi Lacroix   ierr = PetscFree(work);CHKERRQ(ierr);
2417*a873a8cdSSam Reynolds   if (type == REDUCTION_NORM_2) {
2418*a873a8cdSSam Reynolds     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2419*a873a8cdSSam Reynolds   } else if (type == REDUCTION_MEAN) {
2420*a873a8cdSSam Reynolds     for (i=0; i<N; i++) reductions[i] /= m;
242147f7623dSRémi Lacroix   }
242247f7623dSRémi Lacroix   PetscFunctionReturn(0);
242347f7623dSRémi Lacroix }
242447f7623dSRémi Lacroix 
2425713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2426bbead8a2SBarry Smith {
2427bbead8a2SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2428bbead8a2SBarry Smith   PetscErrorCode ierr;
2429bbead8a2SBarry Smith 
2430bbead8a2SBarry Smith   PetscFunctionBegin;
2431bbead8a2SBarry Smith   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
24327b6c816cSBarry Smith   A->factorerrortype             = a->A->factorerrortype;
24337b6c816cSBarry Smith   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
24347b6c816cSBarry Smith   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2435bbead8a2SBarry Smith   PetscFunctionReturn(0);
2436bbead8a2SBarry Smith }
2437bbead8a2SBarry Smith 
24387d68702bSBarry Smith PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
24397d68702bSBarry Smith {
24407d68702bSBarry Smith   PetscErrorCode ierr;
24417d68702bSBarry Smith   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
24426f33a894SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
24437d68702bSBarry Smith 
24447d68702bSBarry Smith   PetscFunctionBegin;
24456f33a894SBarry Smith   if (!Y->preallocated) {
24467d68702bSBarry Smith     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
24476f33a894SBarry Smith   } else if (!aij->nz) {
2448b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
24496f33a894SBarry Smith     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2450b83222d8SBarry Smith     aij->nonew = nonew;
24517d68702bSBarry Smith   }
24527d68702bSBarry Smith   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
24537d68702bSBarry Smith   PetscFunctionReturn(0);
24547d68702bSBarry Smith }
24558c7482ecSBarry Smith 
24563b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
24573b49f96aSBarry Smith {
24583b49f96aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
24593b49f96aSBarry Smith   PetscErrorCode ierr;
24603b49f96aSBarry Smith 
24613b49f96aSBarry Smith   PetscFunctionBegin;
24623b49f96aSBarry Smith   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
24633b49f96aSBarry Smith   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
24643b49f96aSBarry Smith   if (d) {
24653b49f96aSBarry Smith     PetscInt rstart;
24663b49f96aSBarry Smith     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
24673b49f96aSBarry Smith     *d += rstart/A->rmap->bs;
24683b49f96aSBarry Smith 
24693b49f96aSBarry Smith   }
24703b49f96aSBarry Smith   PetscFunctionReturn(0);
24713b49f96aSBarry Smith }
24723b49f96aSBarry Smith 
2473a5b7ff6bSBarry Smith PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2474a5b7ff6bSBarry Smith {
2475a5b7ff6bSBarry Smith   PetscFunctionBegin;
2476a5b7ff6bSBarry Smith   *a = ((Mat_MPIBAIJ*)A->data)->A;
2477a5b7ff6bSBarry Smith   PetscFunctionReturn(0);
2478a5b7ff6bSBarry Smith }
2479a5b7ff6bSBarry Smith 
248079bdfe76SSatish Balay /* -------------------------------------------------------------------*/
24813964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2482cc2dc46cSBarry Smith                                        MatGetRow_MPIBAIJ,
2483cc2dc46cSBarry Smith                                        MatRestoreRow_MPIBAIJ,
2484cc2dc46cSBarry Smith                                        MatMult_MPIBAIJ,
248597304618SKris Buschelman                                 /* 4*/ MatMultAdd_MPIBAIJ,
24867c922b88SBarry Smith                                        MatMultTranspose_MPIBAIJ,
24877c922b88SBarry Smith                                        MatMultTransposeAdd_MPIBAIJ,
2488f4259b30SLisandro Dalcin                                        NULL,
2489f4259b30SLisandro Dalcin                                        NULL,
2490f4259b30SLisandro Dalcin                                        NULL,
2491f4259b30SLisandro Dalcin                                 /*10*/ NULL,
2492f4259b30SLisandro Dalcin                                        NULL,
2493f4259b30SLisandro Dalcin                                        NULL,
2494b1a666ecSBarry Smith                                        MatSOR_MPIBAIJ,
2495cc2dc46cSBarry Smith                                        MatTranspose_MPIBAIJ,
249697304618SKris Buschelman                                 /*15*/ MatGetInfo_MPIBAIJ,
24977fc3c18eSBarry Smith                                        MatEqual_MPIBAIJ,
2498cc2dc46cSBarry Smith                                        MatGetDiagonal_MPIBAIJ,
2499cc2dc46cSBarry Smith                                        MatDiagonalScale_MPIBAIJ,
2500cc2dc46cSBarry Smith                                        MatNorm_MPIBAIJ,
250197304618SKris Buschelman                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2502cc2dc46cSBarry Smith                                        MatAssemblyEnd_MPIBAIJ,
2503cc2dc46cSBarry Smith                                        MatSetOption_MPIBAIJ,
2504cc2dc46cSBarry Smith                                        MatZeroEntries_MPIBAIJ,
2505d519adbfSMatthew Knepley                                 /*24*/ MatZeroRows_MPIBAIJ,
2506f4259b30SLisandro Dalcin                                        NULL,
2507f4259b30SLisandro Dalcin                                        NULL,
2508f4259b30SLisandro Dalcin                                        NULL,
2509f4259b30SLisandro Dalcin                                        NULL,
25104994cf47SJed Brown                                 /*29*/ MatSetUp_MPIBAIJ,
2511f4259b30SLisandro Dalcin                                        NULL,
2512f4259b30SLisandro Dalcin                                        NULL,
2513a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPIBAIJ,
2514f4259b30SLisandro Dalcin                                        NULL,
2515d519adbfSMatthew Knepley                                 /*34*/ MatDuplicate_MPIBAIJ,
2516f4259b30SLisandro Dalcin                                        NULL,
2517f4259b30SLisandro Dalcin                                        NULL,
2518f4259b30SLisandro Dalcin                                        NULL,
2519f4259b30SLisandro Dalcin                                        NULL,
2520d519adbfSMatthew Knepley                                 /*39*/ MatAXPY_MPIBAIJ,
25217dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIBAIJ,
2522cc2dc46cSBarry Smith                                        MatIncreaseOverlap_MPIBAIJ,
2523cc2dc46cSBarry Smith                                        MatGetValues_MPIBAIJ,
25243c896bc6SHong Zhang                                        MatCopy_MPIBAIJ,
2525f4259b30SLisandro Dalcin                                 /*44*/ NULL,
2526cc2dc46cSBarry Smith                                        MatScale_MPIBAIJ,
25277d68702bSBarry Smith                                        MatShift_MPIBAIJ,
2528f4259b30SLisandro Dalcin                                        NULL,
25296f0a72daSMatthew G. Knepley                                        MatZeroRowsColumns_MPIBAIJ,
2530f4259b30SLisandro Dalcin                                 /*49*/ NULL,
2531f4259b30SLisandro Dalcin                                        NULL,
2532f4259b30SLisandro Dalcin                                        NULL,
2533f4259b30SLisandro Dalcin                                        NULL,
2534f4259b30SLisandro Dalcin                                        NULL,
253593dfae19SHong Zhang                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2536f4259b30SLisandro Dalcin                                        NULL,
2537cc2dc46cSBarry Smith                                        MatSetUnfactored_MPIBAIJ,
253882094794SBarry Smith                                        MatPermute_MPIBAIJ,
2539cc2dc46cSBarry Smith                                        MatSetValuesBlocked_MPIBAIJ,
25407dae84e0SHong Zhang                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2541f14a1c24SBarry Smith                                        MatDestroy_MPIBAIJ,
2542f14a1c24SBarry Smith                                        MatView_MPIBAIJ,
2543f4259b30SLisandro Dalcin                                        NULL,
2544f4259b30SLisandro Dalcin                                        NULL,
2545f4259b30SLisandro Dalcin                                 /*64*/ NULL,
2546f4259b30SLisandro Dalcin                                        NULL,
2547f4259b30SLisandro Dalcin                                        NULL,
2548f4259b30SLisandro Dalcin                                        NULL,
2549f4259b30SLisandro Dalcin                                        NULL,
2550d519adbfSMatthew Knepley                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2551f4259b30SLisandro Dalcin                                        NULL,
2552f4259b30SLisandro Dalcin                                        NULL,
2553f4259b30SLisandro Dalcin                                        NULL,
2554f4259b30SLisandro Dalcin                                        NULL,
2555f4259b30SLisandro Dalcin                                 /*74*/ NULL,
2556f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
2557f4259b30SLisandro Dalcin                                        NULL,
2558f4259b30SLisandro Dalcin                                        NULL,
2559f4259b30SLisandro Dalcin                                        NULL,
2560f4259b30SLisandro Dalcin                                 /*79*/ NULL,
2561f4259b30SLisandro Dalcin                                        NULL,
2562f4259b30SLisandro Dalcin                                        NULL,
2563f4259b30SLisandro Dalcin                                        NULL,
25645bba2384SShri Abhyankar                                        MatLoad_MPIBAIJ,
2565f4259b30SLisandro Dalcin                                 /*84*/ NULL,
2566f4259b30SLisandro Dalcin                                        NULL,
2567f4259b30SLisandro Dalcin                                        NULL,
2568f4259b30SLisandro Dalcin                                        NULL,
2569f4259b30SLisandro Dalcin                                        NULL,
2570f4259b30SLisandro Dalcin                                 /*89*/ NULL,
2571f4259b30SLisandro Dalcin                                        NULL,
2572f4259b30SLisandro Dalcin                                        NULL,
2573f4259b30SLisandro Dalcin                                        NULL,
2574f4259b30SLisandro Dalcin                                        NULL,
2575f4259b30SLisandro Dalcin                                 /*94*/ NULL,
2576f4259b30SLisandro Dalcin                                        NULL,
2577f4259b30SLisandro Dalcin                                        NULL,
2578f4259b30SLisandro Dalcin                                        NULL,
2579f4259b30SLisandro Dalcin                                        NULL,
2580f4259b30SLisandro Dalcin                                 /*99*/ NULL,
2581f4259b30SLisandro Dalcin                                        NULL,
2582f4259b30SLisandro Dalcin                                        NULL,
25832726fb6dSPierre Jolivet                                        MatConjugate_MPIBAIJ,
2584f4259b30SLisandro Dalcin                                        NULL,
2585f4259b30SLisandro Dalcin                                 /*104*/NULL,
258699cafbc1SBarry Smith                                        MatRealPart_MPIBAIJ,
25878c7482ecSBarry Smith                                        MatImaginaryPart_MPIBAIJ,
2588f4259b30SLisandro Dalcin                                        NULL,
2589f4259b30SLisandro Dalcin                                        NULL,
2590f4259b30SLisandro Dalcin                                 /*109*/NULL,
2591f4259b30SLisandro Dalcin                                        NULL,
2592f4259b30SLisandro Dalcin                                        NULL,
2593f4259b30SLisandro Dalcin                                        NULL,
25943b49f96aSBarry Smith                                        MatMissingDiagonal_MPIBAIJ,
2595d1adec66SJed Brown                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2596f4259b30SLisandro Dalcin                                        NULL,
25974683f7a4SShri Abhyankar                                        MatGetGhosts_MPIBAIJ,
2598f4259b30SLisandro Dalcin                                        NULL,
2599f4259b30SLisandro Dalcin                                        NULL,
2600f4259b30SLisandro Dalcin                                 /*119*/NULL,
2601f4259b30SLisandro Dalcin                                        NULL,
2602f4259b30SLisandro Dalcin                                        NULL,
2603f4259b30SLisandro Dalcin                                        NULL,
2604e8271787SHong Zhang                                        MatGetMultiProcBlock_MPIBAIJ,
2605f4259b30SLisandro Dalcin                                 /*124*/NULL,
2606*a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIBAIJ,
26073964eb88SJed Brown                                        MatInvertBlockDiagonal_MPIBAIJ,
2608f4259b30SLisandro Dalcin                                        NULL,
2609f4259b30SLisandro Dalcin                                        NULL,
2610f4259b30SLisandro Dalcin                                /*129*/ NULL,
2611f4259b30SLisandro Dalcin                                        NULL,
2612f4259b30SLisandro Dalcin                                        NULL,
2613f4259b30SLisandro Dalcin                                        NULL,
2614f4259b30SLisandro Dalcin                                        NULL,
2615f4259b30SLisandro Dalcin                                /*134*/ NULL,
2616f4259b30SLisandro Dalcin                                        NULL,
2617f4259b30SLisandro Dalcin                                        NULL,
2618f4259b30SLisandro Dalcin                                        NULL,
2619f4259b30SLisandro Dalcin                                        NULL,
262046533700Sstefano_zampini                                /*139*/ MatSetBlockSizes_Default,
2621f4259b30SLisandro Dalcin                                        NULL,
2622f4259b30SLisandro Dalcin                                        NULL,
2623bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_MPIXAIJ,
2624f4259b30SLisandro Dalcin                                        NULL,
2625bdf6f3fcSHong Zhang                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
26268c7482ecSBarry Smith };
262779bdfe76SSatish Balay 
2628cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2629c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2630d94109b8SHong Zhang 
2631cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2632aac34f13SBarry Smith {
2633b8d659d7SLisandro Dalcin   PetscInt       m,rstart,cstart,cend;
2634f4259b30SLisandro Dalcin   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2635f4259b30SLisandro Dalcin   const PetscInt *JJ    =NULL;
2636f4259b30SLisandro Dalcin   PetscScalar    *values=NULL;
2637d47bf9aaSJed Brown   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2638aac34f13SBarry Smith   PetscErrorCode ierr;
26393bd0feecSPierre Jolivet   PetscBool      nooffprocentries;
2640aac34f13SBarry Smith 
2641aac34f13SBarry Smith   PetscFunctionBegin;
264226283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
264326283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
264426283091SBarry Smith   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
264526283091SBarry Smith   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2646e02043d6SBarry Smith   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2647d0f46423SBarry Smith   m      = B->rmap->n/bs;
2648d0f46423SBarry Smith   rstart = B->rmap->rstart/bs;
2649d0f46423SBarry Smith   cstart = B->cmap->rstart/bs;
2650d0f46423SBarry Smith   cend   = B->cmap->rend/bs;
2651b8d659d7SLisandro Dalcin 
2652e32f2f54SBarry Smith   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2653dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2654aac34f13SBarry Smith   for (i=0; i<m; i++) {
2655cf12db73SBarry Smith     nz = ii[i+1] - ii[i];
2656e32f2f54SBarry Smith     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2657b8d659d7SLisandro Dalcin     nz_max = PetscMax(nz_max,nz);
265837cd3c0dSBarry Smith     dlen   = 0;
265937cd3c0dSBarry Smith     olen   = 0;
2660cf12db73SBarry Smith     JJ     = jj + ii[i];
2661b8d659d7SLisandro Dalcin     for (j=0; j<nz; j++) {
266237cd3c0dSBarry Smith       if (*JJ < cstart || *JJ >= cend) olen++;
266337cd3c0dSBarry Smith       else dlen++;
2664aac34f13SBarry Smith       JJ++;
2665aac34f13SBarry Smith     }
266637cd3c0dSBarry Smith     d_nnz[i] = dlen;
266737cd3c0dSBarry Smith     o_nnz[i] = olen;
2668aac34f13SBarry Smith   }
2669aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2670fca92195SBarry Smith   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2671aac34f13SBarry Smith 
2672b8d659d7SLisandro Dalcin   values = (PetscScalar*)V;
2673b8d659d7SLisandro Dalcin   if (!values) {
267437cd3c0dSBarry Smith     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2675b8d659d7SLisandro Dalcin   }
2676b8d659d7SLisandro Dalcin   for (i=0; i<m; i++) {
2677b8d659d7SLisandro Dalcin     PetscInt          row    = i + rstart;
2678cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
2679cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
2680bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2681cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2682b8d659d7SLisandro Dalcin       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
26833adadaf3SJed Brown     } else {                    /* block ordering does not match so we can only insert one block at a time. */
26843adadaf3SJed Brown       PetscInt j;
26853adadaf3SJed Brown       for (j=0; j<ncols; j++) {
26863adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
26873adadaf3SJed Brown         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
26883adadaf3SJed Brown       }
26893adadaf3SJed Brown     }
2690aac34f13SBarry Smith   }
2691aac34f13SBarry Smith 
2692b8d659d7SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
26933bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
26943bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
2695aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2696aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
26973bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
26983bd0feecSPierre Jolivet 
26997827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2700aac34f13SBarry Smith   PetscFunctionReturn(0);
2701aac34f13SBarry Smith }
2702aac34f13SBarry Smith 
2703aac34f13SBarry Smith /*@C
2704664954b6SBarry Smith    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2705aac34f13SBarry Smith 
2706d083f849SBarry Smith    Collective
2707aac34f13SBarry Smith 
2708aac34f13SBarry Smith    Input Parameters:
27091c4f3114SJed Brown +  B - the matrix
2710dfb205c3SBarry Smith .  bs - the block size
2711aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2712aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2713aac34f13SBarry Smith -  v - optional values in the matrix
2714aac34f13SBarry Smith 
2715664954b6SBarry Smith    Level: advanced
2716aac34f13SBarry Smith 
271795452b02SPatrick Sanan    Notes:
271895452b02SPatrick Sanan     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
27193adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
27203adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
27213adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
27223adadaf3SJed Brown    block column and the second index is over columns within a block.
27233adadaf3SJed Brown 
2724664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2725664954b6SBarry Smith 
27263adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2727aac34f13SBarry Smith @*/
27287087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2729aac34f13SBarry Smith {
27304ac538c5SBarry Smith   PetscErrorCode ierr;
2731aac34f13SBarry Smith 
2732aac34f13SBarry Smith   PetscFunctionBegin;
27336ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
27346ba663aaSJed Brown   PetscValidType(B,1);
27356ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
27364ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2737aac34f13SBarry Smith   PetscFunctionReturn(0);
2738aac34f13SBarry Smith }
2739aac34f13SBarry Smith 
2740b2573a8aSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2741a23d5eceSKris Buschelman {
2742a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2743dfbe8321SBarry Smith   PetscErrorCode ierr;
2744535b19f3SBarry Smith   PetscInt       i;
27455d2a9ed1SStefano Zampini   PetscMPIInt    size;
2746a23d5eceSKris Buschelman 
2747a23d5eceSKris Buschelman   PetscFunctionBegin;
274833d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
274926283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
275026283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2751e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2752899cda47SBarry Smith 
2753a23d5eceSKris Buschelman   if (d_nnz) {
2754d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2755e32f2f54SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2756a23d5eceSKris Buschelman     }
2757a23d5eceSKris Buschelman   }
2758a23d5eceSKris Buschelman   if (o_nnz) {
2759d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2760e32f2f54SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2761a23d5eceSKris Buschelman     }
2762a23d5eceSKris Buschelman   }
2763a23d5eceSKris Buschelman 
2764a23d5eceSKris Buschelman   b      = (Mat_MPIBAIJ*)B->data;
2765a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2766d0f46423SBarry Smith   b->mbs = B->rmap->n/bs;
2767d0f46423SBarry Smith   b->nbs = B->cmap->n/bs;
2768d0f46423SBarry Smith   b->Mbs = B->rmap->N/bs;
2769d0f46423SBarry Smith   b->Nbs = B->cmap->N/bs;
2770a23d5eceSKris Buschelman 
2771a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2772d0f46423SBarry Smith     b->rangebs[i] = B->rmap->range[i]/bs;
2773a23d5eceSKris Buschelman   }
2774d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart/bs;
2775d0f46423SBarry Smith   b->rendbs   = B->rmap->rend/bs;
2776d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart/bs;
2777d0f46423SBarry Smith   b->cendbs   = B->cmap->rend/bs;
2778a23d5eceSKris Buschelman 
2779cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
2780cb7b82ddSBarry Smith   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2781cb7b82ddSBarry Smith #else
2782cb7b82ddSBarry Smith   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2783cb7b82ddSBarry Smith #endif
2784cb7b82ddSBarry Smith   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2785cb7b82ddSBarry Smith   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2786cb7b82ddSBarry Smith   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2787cb7b82ddSBarry Smith 
2788cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
2789ffc4695bSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2790cb7b82ddSBarry Smith   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2791cb7b82ddSBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
27925d2a9ed1SStefano Zampini   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2793cb7b82ddSBarry Smith   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2794cb7b82ddSBarry Smith   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2795cb7b82ddSBarry Smith 
2796526dfc15SBarry Smith   if (!B->preallocated) {
2797f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2798d0f46423SBarry Smith     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
27999c097c71SKris Buschelman     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
28003bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2801ce94432eSBarry Smith     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2802526dfc15SBarry Smith   }
2803a23d5eceSKris Buschelman 
2804526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2805526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2806526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
2807cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
2808cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
2809a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2810a23d5eceSKris Buschelman }
2811a23d5eceSKris Buschelman 
28127087cfbeSBarry Smith extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
28137087cfbeSBarry Smith extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
28145bf65638SKris Buschelman 
2815cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
281682094794SBarry Smith {
281782094794SBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
281882094794SBarry Smith   PetscErrorCode ierr;
281982094794SBarry Smith   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
282082094794SBarry Smith   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
282182094794SBarry Smith   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
282282094794SBarry Smith 
282382094794SBarry Smith   PetscFunctionBegin;
2824854ce69bSBarry Smith   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
282582094794SBarry Smith   ii[0] = 0;
282682094794SBarry Smith   for (i=0; i<M; i++) {
2827e32f2f54SBarry Smith     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2828e32f2f54SBarry Smith     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
282982094794SBarry Smith     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
28305ee9ba1cSJed Brown     /* remove one from count of matrix has diagonal */
28315ee9ba1cSJed Brown     for (j=id[i]; j<id[i+1]; j++) {
28325ee9ba1cSJed Brown       if (jd[j] == i) {ii[i+1]--;break;}
28335ee9ba1cSJed Brown     }
283482094794SBarry Smith   }
2835785e854fSJed Brown   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
283682094794SBarry Smith   cnt  = 0;
283782094794SBarry Smith   for (i=0; i<M; i++) {
283882094794SBarry Smith     for (j=io[i]; j<io[i+1]; j++) {
283982094794SBarry Smith       if (garray[jo[j]] > rstart) break;
284082094794SBarry Smith       jj[cnt++] = garray[jo[j]];
284182094794SBarry Smith     }
284282094794SBarry Smith     for (k=id[i]; k<id[i+1]; k++) {
28435ee9ba1cSJed Brown       if (jd[k] != i) {
284482094794SBarry Smith         jj[cnt++] = rstart + jd[k];
284582094794SBarry Smith       }
28465ee9ba1cSJed Brown     }
284782094794SBarry Smith     for (; j<io[i+1]; j++) {
284882094794SBarry Smith       jj[cnt++] = garray[jo[j]];
284982094794SBarry Smith     }
285082094794SBarry Smith   }
2851ce94432eSBarry Smith   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
285282094794SBarry Smith   PetscFunctionReturn(0);
285382094794SBarry Smith }
285482094794SBarry Smith 
2855c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
285662471d69SBarry Smith 
2857cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2858b2573a8aSBarry Smith 
2859cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
286062471d69SBarry Smith {
286162471d69SBarry Smith   PetscErrorCode ierr;
286262471d69SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
286362471d69SBarry Smith   Mat            B;
286485a69837SSatish Balay   Mat_MPIAIJ     *b;
286562471d69SBarry Smith 
286662471d69SBarry Smith   PetscFunctionBegin;
2867ce94432eSBarry Smith   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
286862471d69SBarry Smith 
28690f6d62edSLisandro Dalcin   if (reuse == MAT_REUSE_MATRIX) {
28700f6d62edSLisandro Dalcin     B = *newmat;
28710f6d62edSLisandro Dalcin   } else {
2872ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
28736d0a4a0eSHong Zhang     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2874f090d951SRémi Lacroix     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2875f090d951SRémi Lacroix     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
28760298fd71SBarry Smith     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
28770298fd71SBarry Smith     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
28780f6d62edSLisandro Dalcin   }
287962471d69SBarry Smith   b = (Mat_MPIAIJ*) B->data;
288062471d69SBarry Smith 
28810f6d62edSLisandro Dalcin   if (reuse == MAT_REUSE_MATRIX) {
28820f6d62edSLisandro Dalcin     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
28830f6d62edSLisandro Dalcin     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
28840f6d62edSLisandro Dalcin   } else {
28856bf464f9SBarry Smith     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
28866bf464f9SBarry Smith     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2887ab9863d7SBarry Smith     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
288862471d69SBarry Smith     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
288962471d69SBarry Smith     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
28906a719282SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28916a719282SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28920f6d62edSLisandro Dalcin   }
28930f6d62edSLisandro Dalcin   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28940f6d62edSLisandro Dalcin   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28950f6d62edSLisandro Dalcin 
2896511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
289728be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
289862471d69SBarry Smith   } else {
289962471d69SBarry Smith    *newmat = B;
290062471d69SBarry Smith   }
290162471d69SBarry Smith   PetscFunctionReturn(0);
290262471d69SBarry Smith }
290362471d69SBarry Smith 
29040bad9183SKris Buschelman /*MC
2905fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
29060bad9183SKris Buschelman 
29070bad9183SKris Buschelman    Options Database Keys:
29088c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
29098c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix
29106679dcc1SBarry Smith . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
29118c07d4e3SBarry Smith - -mat_use_hash_table <fact>
29120bad9183SKris Buschelman 
29130bad9183SKris Buschelman    Level: beginner
29140cd7f59aSBarry Smith 
29150cd7f59aSBarry Smith    Notes:
29160cd7f59aSBarry Smith     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
29170cd7f59aSBarry Smith     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
29180bad9183SKris Buschelman 
2919fd292e60Sprj- .seealso: MatCreateBAIJ
29200bad9183SKris Buschelman M*/
29210bad9183SKris Buschelman 
2922cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2923c0cdd4a1SDahai Guo 
29248cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2925273d9f13SBarry Smith {
2926273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
2927dfbe8321SBarry Smith   PetscErrorCode ierr;
292894ae4db5SBarry Smith   PetscBool      flg = PETSC_FALSE;
2929273d9f13SBarry Smith 
2930273d9f13SBarry Smith   PetscFunctionBegin;
2931b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
293282502324SSatish Balay   B->data = (void*)b;
293382502324SSatish Balay 
2934273d9f13SBarry Smith   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2935273d9f13SBarry Smith   B->assembled = PETSC_FALSE;
2936273d9f13SBarry Smith 
2937273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
293855b25c41SPierre Jolivet   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
293955b25c41SPierre Jolivet   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2940273d9f13SBarry Smith 
2941273d9f13SBarry Smith   /* build local table of row and column ownerships */
2942854ce69bSBarry Smith   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2943273d9f13SBarry Smith 
2944273d9f13SBarry Smith   /* build cache for off array entries formed */
2945ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
294626fbe8dcSKarl Rupp 
2947273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
29480298fd71SBarry Smith   b->colmap      = NULL;
29490298fd71SBarry Smith   b->garray      = NULL;
2950273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2951273d9f13SBarry Smith 
2952273d9f13SBarry Smith   /* stuff used in block assembly */
2953f4259b30SLisandro Dalcin   b->barray = NULL;
2954273d9f13SBarry Smith 
2955273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2956f4259b30SLisandro Dalcin   b->lvec  = NULL;
2957f4259b30SLisandro Dalcin   b->Mvctx = NULL;
2958273d9f13SBarry Smith 
2959273d9f13SBarry Smith   /* stuff for MatGetRow() */
2960f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2961f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2962273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2963273d9f13SBarry Smith 
2964273d9f13SBarry Smith   /* hash table stuff */
2965f4259b30SLisandro Dalcin   b->ht           = NULL;
2966f4259b30SLisandro Dalcin   b->hd           = NULL;
2967273d9f13SBarry Smith   b->ht_size      = 0;
2968273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2969273d9f13SBarry Smith   b->ht_fact      = 0;
2970273d9f13SBarry Smith   b->ht_total_ct  = 0;
2971273d9f13SBarry Smith   b->ht_insert_ct = 0;
2972273d9f13SBarry Smith 
29737dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
29747a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
29757a868f3eSHong Zhang 
2976bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2977bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2978bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
29797ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
29807ea3e4caSstefano_zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
29817ea3e4caSstefano_zampini #endif
2982bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2983bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2984bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2985bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2986bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2987bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2988c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
298917667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
299094ae4db5SBarry Smith 
299194ae4db5SBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2992abf3b562SBarry Smith   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
299394ae4db5SBarry Smith   if (flg) {
299494ae4db5SBarry Smith     PetscReal fact = 1.39;
299594ae4db5SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
299694ae4db5SBarry Smith     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
299794ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
299894ae4db5SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
299994ae4db5SBarry Smith     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
300094ae4db5SBarry Smith   }
300194ae4db5SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3002273d9f13SBarry Smith   PetscFunctionReturn(0);
3003273d9f13SBarry Smith }
3004273d9f13SBarry Smith 
3005209238afSKris Buschelman /*MC
3006002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3007209238afSKris Buschelman 
3008209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3009209238afSKris Buschelman    and MATMPIBAIJ otherwise.
3010209238afSKris Buschelman 
3011209238afSKris Buschelman    Options Database Keys:
3012209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3013209238afSKris Buschelman 
3014209238afSKris Buschelman   Level: beginner
3015209238afSKris Buschelman 
301669b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3017209238afSKris Buschelman M*/
3018209238afSKris Buschelman 
3019273d9f13SBarry Smith /*@C
3020aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3021273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
3022273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
3023273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3024273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
3025273d9f13SBarry Smith 
3026273d9f13SBarry Smith    Collective on Mat
3027273d9f13SBarry Smith 
3028273d9f13SBarry Smith    Input Parameters:
30291c4f3114SJed Brown +  B - the matrix
3030bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3031bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3032273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3033273d9f13SBarry Smith            submatrix  (same for all local rows)
3034273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
3035273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
30360298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
303795742e49SBarry Smith            set it even if it is zero.
3038273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3039273d9f13SBarry Smith            submatrix (same for all local rows).
3040273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
3041273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
30420298fd71SBarry Smith            each block row) or NULL.
3043273d9f13SBarry Smith 
304449a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
3045273d9f13SBarry Smith 
3046273d9f13SBarry Smith    Options Database Keys:
30478c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
30488c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
3049273d9f13SBarry Smith 
3050273d9f13SBarry Smith    Notes:
3051273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3052273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
3053273d9f13SBarry Smith 
3054273d9f13SBarry Smith    Storage Information:
3055273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
3056273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
3057273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
3058273d9f13SBarry Smith    local matrix (a rectangular submatrix).
3059273d9f13SBarry Smith 
3060273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
3061273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
30620298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3063273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
3064273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3065273d9f13SBarry Smith 
3066273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3067273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
3068273d9f13SBarry Smith 
3069273d9f13SBarry Smith .vb
3070273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
3071a4b1a0f6SJed Brown           --------------------------
3072273d9f13SBarry Smith    row 3  |o o o d d d o o o o  o  o
3073273d9f13SBarry Smith    row 4  |o o o d d d o o o o  o  o
3074273d9f13SBarry Smith    row 5  |o o o d d d o o o o  o  o
3075a4b1a0f6SJed Brown           --------------------------
3076273d9f13SBarry Smith .ve
3077273d9f13SBarry Smith 
3078273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
3079273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
3080273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3081273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
3082273d9f13SBarry Smith 
3083273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3084273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3085273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
3086273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3087273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
3088273d9f13SBarry Smith    matrices.
3089273d9f13SBarry Smith 
3090aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3091aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3092aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3093aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3094aa95bbe8SBarry Smith 
3095273d9f13SBarry Smith    Level: intermediate
3096273d9f13SBarry Smith 
3097ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3098273d9f13SBarry Smith @*/
30997087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3100273d9f13SBarry Smith {
31014ac538c5SBarry Smith   PetscErrorCode ierr;
3102273d9f13SBarry Smith 
3103273d9f13SBarry Smith   PetscFunctionBegin;
31046ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
31056ba663aaSJed Brown   PetscValidType(B,1);
31066ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
31074ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3108273d9f13SBarry Smith   PetscFunctionReturn(0);
3109273d9f13SBarry Smith }
3110273d9f13SBarry Smith 
311179bdfe76SSatish Balay /*@C
311269b1f4b7SBarry Smith    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
311379bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
311479bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
311579bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
311679bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
311779bdfe76SSatish Balay 
3118d083f849SBarry Smith    Collective
3119db81eaa0SLois Curfman McInnes 
312079bdfe76SSatish Balay    Input Parameters:
3121db81eaa0SLois Curfman McInnes +  comm - MPI communicator
3122bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3123bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
312479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
312592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
312692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
312792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
312892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
312992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
3130be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3131be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
313247a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
313379bdfe76SSatish Balay            submatrix  (same for all local rows)
313447a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
313592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
31360298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
313795742e49SBarry Smith            and set it even if it is zero.
313847a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
313979bdfe76SSatish Balay            submatrix (same for all local rows).
314047a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
314192e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
31420298fd71SBarry Smith            each block row) or NULL.
314379bdfe76SSatish Balay 
314479bdfe76SSatish Balay    Output Parameter:
314579bdfe76SSatish Balay .  A - the matrix
314679bdfe76SSatish Balay 
3147db81eaa0SLois Curfman McInnes    Options Database Keys:
31488c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
31498c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
31503ffaccefSLois Curfman McInnes 
3151175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3152f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3153175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3154175b88e8SBarry Smith 
3155b259b22eSLois Curfman McInnes    Notes:
315649a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
315749a6f317SBarry Smith 
315847a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
315947a75d0bSBarry Smith 
316079bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
316179bdfe76SSatish Balay    (possibly both).
316279bdfe76SSatish Balay 
3163be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3164be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
3165be79a94dSBarry Smith 
316679bdfe76SSatish Balay    Storage Information:
316779bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
316879bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
316979bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
317079bdfe76SSatish Balay    local matrix (a rectangular submatrix).
317179bdfe76SSatish Balay 
317279bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
317379bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
31740298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
317579bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
317679bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
317779bdfe76SSatish Balay 
317879bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
317979bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
318079bdfe76SSatish Balay 
3181db81eaa0SLois Curfman McInnes .vb
3182db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
3183a4b1a0f6SJed Brown           --------------------------
3184db81eaa0SLois Curfman McInnes    row 3  |o o o d d d o o o o  o  o
3185db81eaa0SLois Curfman McInnes    row 4  |o o o d d d o o o o  o  o
3186db81eaa0SLois Curfman McInnes    row 5  |o o o d d d o o o o  o  o
3187a4b1a0f6SJed Brown           --------------------------
3188db81eaa0SLois Curfman McInnes .ve
318979bdfe76SSatish Balay 
319079bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
319179bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
319279bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
319357b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
319479bdfe76SSatish Balay 
3195d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3196d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
319779bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
319892e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
319992e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
32006da5968aSLois Curfman McInnes    matrices.
320179bdfe76SSatish Balay 
3202027ccd11SLois Curfman McInnes    Level: intermediate
3203027ccd11SLois Curfman McInnes 
320469b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
320579bdfe76SSatish Balay @*/
320669b1f4b7SBarry Smith PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
320779bdfe76SSatish Balay {
32086849ba73SBarry Smith   PetscErrorCode ierr;
3209b24ad042SBarry Smith   PetscMPIInt    size;
321079bdfe76SSatish Balay 
3211d64ed03dSBarry Smith   PetscFunctionBegin;
3212f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3213f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3214ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3215273d9f13SBarry Smith   if (size > 1) {
3216273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3217273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3218273d9f13SBarry Smith   } else {
3219273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3220273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
32213914022bSBarry Smith   }
32223a40ed3dSBarry Smith   PetscFunctionReturn(0);
322379bdfe76SSatish Balay }
3224026e39d0SSatish Balay 
32256849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
32260ac07820SSatish Balay {
32270ac07820SSatish Balay   Mat            mat;
32280ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3229dfbe8321SBarry Smith   PetscErrorCode ierr;
3230b24ad042SBarry Smith   PetscInt       len=0;
32310ac07820SSatish Balay 
3232d64ed03dSBarry Smith   PetscFunctionBegin;
3233f4259b30SLisandro Dalcin   *newmat = NULL;
3234ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3235d0f46423SBarry Smith   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
32367adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
32377fff6886SHong Zhang 
3238d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
3239273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
32400ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
32417fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
32427fff6886SHong Zhang 
3243273d9f13SBarry Smith   a             = (Mat_MPIBAIJ*)mat->data;
3244d0f46423SBarry Smith   mat->rmap->bs = matin->rmap->bs;
32450ac07820SSatish Balay   a->bs2        = oldmat->bs2;
32460ac07820SSatish Balay   a->mbs        = oldmat->mbs;
32470ac07820SSatish Balay   a->nbs        = oldmat->nbs;
32480ac07820SSatish Balay   a->Mbs        = oldmat->Mbs;
32490ac07820SSatish Balay   a->Nbs        = oldmat->Nbs;
32500ac07820SSatish Balay 
32511e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
32521e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3253899cda47SBarry Smith 
32540ac07820SSatish Balay   a->size         = oldmat->size;
32550ac07820SSatish Balay   a->rank         = oldmat->rank;
3256aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
3257aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
3258f4259b30SLisandro Dalcin   a->rowindices   = NULL;
3259f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
32600ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
3261f4259b30SLisandro Dalcin   a->barray       = NULL;
3262899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
3263899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
3264899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
3265899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
32660ac07820SSatish Balay 
3267133cdb44SSatish Balay   /* hash table stuff */
3268f4259b30SLisandro Dalcin   a->ht           = NULL;
3269f4259b30SLisandro Dalcin   a->hd           = NULL;
3270133cdb44SSatish Balay   a->ht_size      = 0;
3271133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
327225fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
3273133cdb44SSatish Balay   a->ht_total_ct  = 0;
3274133cdb44SSatish Balay   a->ht_insert_ct = 0;
3275133cdb44SSatish Balay 
3276580bdb30SBarry Smith   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
32770ac07820SSatish Balay   if (oldmat->colmap) {
3278aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
32790f5bd95cSBarry Smith     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
328048e59246SSatish Balay #else
3281854ce69bSBarry Smith     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
32823bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3283580bdb30SBarry Smith     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
328448e59246SSatish Balay #endif
3285f4259b30SLisandro Dalcin   } else a->colmap = NULL;
32864beb1cfeSHong Zhang 
32870ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3288785e854fSJed Brown     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
32893bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3290580bdb30SBarry Smith     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3291f4259b30SLisandro Dalcin   } else a->garray = NULL;
32920ac07820SSatish Balay 
3293ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
32940ac07820SSatish Balay   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
32953bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
32960ac07820SSatish Balay   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
32973bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
32987fff6886SHong Zhang 
32992e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
33003bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
33012e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
33023bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3303140e18c1SBarry Smith   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
33040ac07820SSatish Balay   *newmat = mat;
33053a40ed3dSBarry Smith   PetscFunctionReturn(0);
33060ac07820SSatish Balay }
330757b952d6SSatish Balay 
3308618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
3309b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3310b51a4376SLisandro Dalcin {
3311b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3312b51a4376SLisandro Dalcin   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3313b51a4376SLisandro Dalcin   PetscScalar    *matvals;
3314b51a4376SLisandro Dalcin   PetscErrorCode ierr;
3315b51a4376SLisandro Dalcin 
3316b51a4376SLisandro Dalcin   PetscFunctionBegin;
3317b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3318b51a4376SLisandro Dalcin 
3319b51a4376SLisandro Dalcin   /* read in matrix header */
3320b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3321b51a4376SLisandro Dalcin   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3322b51a4376SLisandro Dalcin   M  = header[1]; N = header[2]; nz = header[3];
3323b51a4376SLisandro Dalcin   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3324b51a4376SLisandro Dalcin   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3325b51a4376SLisandro Dalcin   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3326b51a4376SLisandro Dalcin 
3327b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
3328b51a4376SLisandro Dalcin   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3329618cc2edSLisandro Dalcin   /* set local sizes if not set already */
3330618cc2edSLisandro Dalcin   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3331618cc2edSLisandro Dalcin   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3332b51a4376SLisandro Dalcin   /* set global sizes if not set already */
3333b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3334b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
3335b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3336b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3337b51a4376SLisandro Dalcin 
3338b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
3339b51a4376SLisandro Dalcin   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3340b51a4376SLisandro Dalcin   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3341b51a4376SLisandro Dalcin   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3342b51a4376SLisandro Dalcin   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
33431e1ea65dSPierre Jolivet   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);CHKERRQ(ierr);
33441e1ea65dSPierre Jolivet   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);CHKERRQ(ierr);
3345b51a4376SLisandro Dalcin   mbs = m/bs; nbs = n/bs;
3346b51a4376SLisandro Dalcin 
3347b51a4376SLisandro Dalcin   /* read in row lengths and build row indices */
3348b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3349b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3350b51a4376SLisandro Dalcin   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3351820f2d46SBarry Smith   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3352b51a4376SLisandro Dalcin   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3353b51a4376SLisandro Dalcin 
3354b51a4376SLisandro Dalcin   /* read in column indices and matrix values */
3355b51a4376SLisandro Dalcin   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3356b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3357b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3358b51a4376SLisandro Dalcin 
3359b51a4376SLisandro Dalcin   { /* preallocate matrix storage */
3360b51a4376SLisandro Dalcin     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3361b51a4376SLisandro Dalcin     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3362618cc2edSLisandro Dalcin     PetscBool  sbaij,done;
3363b51a4376SLisandro Dalcin     PetscInt   *d_nnz,*o_nnz;
3364b51a4376SLisandro Dalcin 
3365b51a4376SLisandro Dalcin     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3366b51a4376SLisandro Dalcin     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3367b51a4376SLisandro Dalcin     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3368618cc2edSLisandro Dalcin     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3369b51a4376SLisandro Dalcin     for (i=0; i<mbs; i++) {
3370b51a4376SLisandro Dalcin       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3371b51a4376SLisandro Dalcin       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3372618cc2edSLisandro Dalcin       for (k=0; k<bs; k++) {
3373618cc2edSLisandro Dalcin         PetscInt row = bs*i + k;
3374618cc2edSLisandro Dalcin         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3375618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3376618cc2edSLisandro Dalcin           if (!sbaij || col >= row) {
3377618cc2edSLisandro Dalcin             if (col >= cs && col < ce) {
3378618cc2edSLisandro Dalcin               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3379b51a4376SLisandro Dalcin             } else {
3380618cc2edSLisandro Dalcin               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3381b51a4376SLisandro Dalcin               if (done) o_nnz[i]++;
3382b51a4376SLisandro Dalcin             }
3383b51a4376SLisandro Dalcin           }
3384618cc2edSLisandro Dalcin         }
3385618cc2edSLisandro Dalcin       }
3386618cc2edSLisandro Dalcin     }
3387b51a4376SLisandro Dalcin     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3388b51a4376SLisandro Dalcin     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3389b51a4376SLisandro Dalcin     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3390618cc2edSLisandro Dalcin     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3391b51a4376SLisandro Dalcin     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3392b51a4376SLisandro Dalcin   }
3393b51a4376SLisandro Dalcin 
3394b51a4376SLisandro Dalcin   /* store matrix values */
3395b51a4376SLisandro Dalcin   for (i=0; i<m; i++) {
3396b51a4376SLisandro Dalcin     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3397618cc2edSLisandro Dalcin     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3398b51a4376SLisandro Dalcin   }
3399b51a4376SLisandro Dalcin 
3400b51a4376SLisandro Dalcin   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3401b51a4376SLisandro Dalcin   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3402b51a4376SLisandro Dalcin   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3403b51a4376SLisandro Dalcin   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3404b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3405b51a4376SLisandro Dalcin }
3406b51a4376SLisandro Dalcin 
3407b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
34084683f7a4SShri Abhyankar {
34094683f7a4SShri Abhyankar   PetscErrorCode ierr;
34107f489da9SVaclav Hapla   PetscBool      isbinary;
34114683f7a4SShri Abhyankar 
34124683f7a4SShri Abhyankar   PetscFunctionBegin;
34137f489da9SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3414b51a4376SLisandro Dalcin   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3415b51a4376SLisandro Dalcin   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
34164683f7a4SShri Abhyankar   PetscFunctionReturn(0);
34174683f7a4SShri Abhyankar }
34184683f7a4SShri Abhyankar 
3419133cdb44SSatish Balay /*@
3420133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3421133cdb44SSatish Balay 
3422133cdb44SSatish Balay    Input Parameters:
3423a2b725a8SWilliam Gropp +  mat  - the matrix
3424a2b725a8SWilliam Gropp -  fact - factor
3425133cdb44SSatish Balay 
3426c5eb9154SBarry Smith    Not Collective, each process can use a different factor
3427fee21e36SBarry Smith 
34288c890885SBarry Smith    Level: advanced
34298c890885SBarry Smith 
3430133cdb44SSatish Balay   Notes:
34318c07d4e3SBarry Smith    This can also be set by the command line option: -mat_use_hash_table <fact>
3432133cdb44SSatish Balay 
3433133cdb44SSatish Balay .seealso: MatSetOption()
3434133cdb44SSatish Balay @*/
34357087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3436133cdb44SSatish Balay {
34374ac538c5SBarry Smith   PetscErrorCode ierr;
34385bf65638SKris Buschelman 
34395bf65638SKris Buschelman   PetscFunctionBegin;
34404ac538c5SBarry Smith   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
34415bf65638SKris Buschelman   PetscFunctionReturn(0);
34425bf65638SKris Buschelman }
34435bf65638SKris Buschelman 
34447087cfbeSBarry Smith PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
34455bf65638SKris Buschelman {
344625fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3447133cdb44SSatish Balay 
3448133cdb44SSatish Balay   PetscFunctionBegin;
3449133cdb44SSatish Balay   baij          = (Mat_MPIBAIJ*)mat->data;
3450133cdb44SSatish Balay   baij->ht_fact = fact;
3451133cdb44SSatish Balay   PetscFunctionReturn(0);
3452133cdb44SSatish Balay }
3453f2a5309cSSatish Balay 
34549230625dSJed Brown PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3455f2a5309cSSatish Balay {
3456f2a5309cSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3457ab4d48faSStefano Zampini   PetscBool      flg;
3458ab4d48faSStefano Zampini   PetscErrorCode ierr;
34595fd66863SKarl Rupp 
3460f2a5309cSSatish Balay   PetscFunctionBegin;
3461ab4d48faSStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3462ab4d48faSStefano Zampini   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
346321e72a00SBarry Smith   if (Ad)     *Ad     = a->A;
346421e72a00SBarry Smith   if (Ao)     *Ao     = a->B;
346521e72a00SBarry Smith   if (colmap) *colmap = a->garray;
3466f2a5309cSSatish Balay   PetscFunctionReturn(0);
3467f2a5309cSSatish Balay }
346885535b8eSBarry Smith 
346985535b8eSBarry Smith /*
347085535b8eSBarry Smith     Special version for direct calls from Fortran (to eliminate two function call overheads
347185535b8eSBarry Smith */
347285535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
347385535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
347485535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
347585535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
347685535b8eSBarry Smith #endif
347785535b8eSBarry Smith 
347885535b8eSBarry Smith /*@C
347985535b8eSBarry Smith   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
348085535b8eSBarry Smith 
348185535b8eSBarry Smith   Collective on Mat
348285535b8eSBarry Smith 
348385535b8eSBarry Smith   Input Parameters:
348485535b8eSBarry Smith + mat - the matrix
348585535b8eSBarry Smith . min - number of input rows
348685535b8eSBarry Smith . im - input rows
348785535b8eSBarry Smith . nin - number of input columns
348885535b8eSBarry Smith . in - input columns
348985535b8eSBarry Smith . v - numerical values input
349085535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES
349185535b8eSBarry Smith 
349295452b02SPatrick Sanan   Notes:
349395452b02SPatrick Sanan     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
349485535b8eSBarry Smith 
349585535b8eSBarry Smith   Level: advanced
349685535b8eSBarry Smith 
349785535b8eSBarry Smith .seealso:   MatSetValuesBlocked()
349885535b8eSBarry Smith @*/
349985535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
350085535b8eSBarry Smith {
350185535b8eSBarry Smith   /* convert input arguments to C version */
350285535b8eSBarry Smith   Mat        mat  = *matin;
350385535b8eSBarry Smith   PetscInt   m    = *min, n = *nin;
350485535b8eSBarry Smith   InsertMode addv = *addvin;
350585535b8eSBarry Smith 
350685535b8eSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
350785535b8eSBarry Smith   const MatScalar *value;
350885535b8eSBarry Smith   MatScalar       *barray     = baij->barray;
3509ace3abfcSBarry Smith   PetscBool       roworiented = baij->roworiented;
351085535b8eSBarry Smith   PetscErrorCode  ierr;
351185535b8eSBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
351285535b8eSBarry Smith   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3513d0f46423SBarry Smith   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
351485535b8eSBarry Smith 
351585535b8eSBarry Smith   PetscFunctionBegin;
351685535b8eSBarry Smith   /* tasks normally handled by MatSetValuesBlocked() */
351726fbe8dcSKarl Rupp   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
351876bd3646SJed Brown   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
351976bd3646SJed Brown   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
352085535b8eSBarry Smith   if (mat->assembled) {
352185535b8eSBarry Smith     mat->was_assembled = PETSC_TRUE;
352285535b8eSBarry Smith     mat->assembled     = PETSC_FALSE;
352385535b8eSBarry Smith   }
352485535b8eSBarry Smith   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
352585535b8eSBarry Smith 
352685535b8eSBarry Smith   if (!barray) {
3527785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
352885535b8eSBarry Smith     baij->barray = barray;
352985535b8eSBarry Smith   }
353085535b8eSBarry Smith 
353126fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
353226fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
353326fbe8dcSKarl Rupp 
353485535b8eSBarry Smith   for (i=0; i<m; i++) {
353585535b8eSBarry Smith     if (im[i] < 0) continue;
3536cf9c20a2SJed Brown     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
353785535b8eSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
353885535b8eSBarry Smith       row = im[i] - rstart;
353985535b8eSBarry Smith       for (j=0; j<n; j++) {
354085535b8eSBarry Smith         /* If NumCol = 1 then a copy is not required */
354185535b8eSBarry Smith         if ((roworiented) && (n == 1)) {
354285535b8eSBarry Smith           barray = (MatScalar*)v + i*bs2;
354385535b8eSBarry Smith         } else if ((!roworiented) && (m == 1)) {
354485535b8eSBarry Smith           barray = (MatScalar*)v + j*bs2;
354585535b8eSBarry Smith         } else { /* Here a copy is required */
354685535b8eSBarry Smith           if (roworiented) {
354785535b8eSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
354885535b8eSBarry Smith           } else {
354985535b8eSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
355085535b8eSBarry Smith           }
355185535b8eSBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
355285535b8eSBarry Smith             for (jj=0; jj<bs; jj++) {
355385535b8eSBarry Smith               *barray++ = *value++;
355485535b8eSBarry Smith             }
355585535b8eSBarry Smith           }
355685535b8eSBarry Smith           barray -=bs2;
355785535b8eSBarry Smith         }
355885535b8eSBarry Smith 
355985535b8eSBarry Smith         if (in[j] >= cstart && in[j] < cend) {
356085535b8eSBarry Smith           col  = in[j] - cstart;
35618ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
356226fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
3563cf9c20a2SJed Brown         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
356485535b8eSBarry Smith         else {
356585535b8eSBarry Smith           if (mat->was_assembled) {
356685535b8eSBarry Smith             if (!baij->colmap) {
3567ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
356885535b8eSBarry Smith             }
356985535b8eSBarry Smith 
357085535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
357185535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
357285535b8eSBarry Smith             { PetscInt data;
357385535b8eSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3574e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
357585535b8eSBarry Smith             }
357685535b8eSBarry Smith #else
3577e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
357885535b8eSBarry Smith #endif
357985535b8eSBarry Smith #endif
358085535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
358185535b8eSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
358285535b8eSBarry Smith             col  = (col - 1)/bs;
358385535b8eSBarry Smith #else
358485535b8eSBarry Smith             col = (baij->colmap[in[j]] - 1)/bs;
358585535b8eSBarry Smith #endif
358685535b8eSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3587ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358885535b8eSBarry Smith               col  =  in[j];
358985535b8eSBarry Smith             }
359026fbe8dcSKarl Rupp           } else col = in[j];
35918ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
359285535b8eSBarry Smith         }
359385535b8eSBarry Smith       }
359485535b8eSBarry Smith     } else {
359585535b8eSBarry Smith       if (!baij->donotstash) {
359685535b8eSBarry Smith         if (roworiented) {
359785535b8eSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
359885535b8eSBarry Smith         } else {
359985535b8eSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
360085535b8eSBarry Smith         }
360185535b8eSBarry Smith       }
360285535b8eSBarry Smith     }
360385535b8eSBarry Smith   }
360485535b8eSBarry Smith 
360585535b8eSBarry Smith   /* task normally handled by MatSetValuesBlocked() */
360685535b8eSBarry Smith   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
360785535b8eSBarry Smith   PetscFunctionReturn(0);
360885535b8eSBarry Smith }
3609dfb205c3SBarry Smith 
3610dfb205c3SBarry Smith /*@
3611483a2f95SBarry Smith      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3612dfb205c3SBarry Smith          CSR format the local rows.
3613dfb205c3SBarry Smith 
3614d083f849SBarry Smith    Collective
3615dfb205c3SBarry Smith 
3616dfb205c3SBarry Smith    Input Parameters:
3617dfb205c3SBarry Smith +  comm - MPI communicator
3618dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
3619dfb205c3SBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
3620dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
3621dfb205c3SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3622dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
3623dfb205c3SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3624dfb205c3SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3625483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3626dfb205c3SBarry Smith .   j - column indices
3627dfb205c3SBarry Smith -   a - matrix values
3628dfb205c3SBarry Smith 
3629dfb205c3SBarry Smith    Output Parameter:
3630dfb205c3SBarry Smith .   mat - the matrix
3631dfb205c3SBarry Smith 
3632dfb205c3SBarry Smith    Level: intermediate
3633dfb205c3SBarry Smith 
3634dfb205c3SBarry Smith    Notes:
3635dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3636dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3637dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3638dfb205c3SBarry Smith 
36393adadaf3SJed Brown      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
36403adadaf3SJed Brown      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
36413adadaf3SJed Brown      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
36423adadaf3SJed Brown      with column-major ordering within blocks.
36433adadaf3SJed Brown 
3644dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3645dfb205c3SBarry Smith 
3646dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
364769b1f4b7SBarry Smith           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3648dfb205c3SBarry Smith @*/
36497087cfbeSBarry Smith PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3650dfb205c3SBarry Smith {
3651dfb205c3SBarry Smith   PetscErrorCode ierr;
3652dfb205c3SBarry Smith 
3653dfb205c3SBarry Smith   PetscFunctionBegin;
3654f23aa3ddSBarry Smith   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3655dfb205c3SBarry Smith   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3656dfb205c3SBarry Smith   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3657dfb205c3SBarry Smith   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
36589a43d2d5SJed Brown   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
365927f91139SJed Brown   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
366027f91139SJed Brown   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3661d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3662dfb205c3SBarry Smith   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3663d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3664dfb205c3SBarry Smith   PetscFunctionReturn(0);
3665dfb205c3SBarry Smith }
3666e561ad89SHong Zhang 
3667bd153df0SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3668e561ad89SHong Zhang {
3669e561ad89SHong Zhang   PetscErrorCode ierr;
3670bd153df0SHong Zhang   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3671bd153df0SHong Zhang   PetscInt       *indx;
3672bd153df0SHong Zhang   PetscScalar    *values;
3673e561ad89SHong Zhang 
3674e561ad89SHong Zhang   PetscFunctionBegin;
3675e561ad89SHong Zhang   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3676bd153df0SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3677bd153df0SHong Zhang     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
36782c6ba4edSHong Zhang     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3679bd153df0SHong Zhang     PetscInt       *bindx,rmax=a->rmax,j;
368077f764caSHong Zhang     PetscMPIInt    rank,size;
3681e561ad89SHong Zhang 
3682bd153df0SHong Zhang     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3683bd153df0SHong Zhang     mbs = m/bs; Nbs = N/cbs;
3684bd153df0SHong Zhang     if (n == PETSC_DECIDE) {
36851e1ea65dSPierre Jolivet       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr);
3686bd153df0SHong Zhang     }
3687da91a574SPierre Jolivet     nbs = n/cbs;
3688e561ad89SHong Zhang 
3689647a6520SHong Zhang     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
369077f764caSHong Zhang     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
369177f764caSHong Zhang 
3692ffc4695bSBarry Smith     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3693ffc4695bSBarry Smith     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
369477f764caSHong Zhang     if (rank == size-1) {
369577f764caSHong Zhang       /* Check sum(nbs) = Nbs */
36962c6ba4edSHong Zhang       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
369777f764caSHong Zhang     }
369877f764caSHong Zhang 
369977f764caSHong Zhang     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3700bd153df0SHong Zhang     for (i=0; i<mbs; i++) {
3701647a6520SHong Zhang       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3702647a6520SHong Zhang       nnz = nnz/bs;
3703647a6520SHong Zhang       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3704647a6520SHong Zhang       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3705647a6520SHong Zhang       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3706e561ad89SHong Zhang     }
3707647a6520SHong Zhang     ierr = PetscFree(bindx);CHKERRQ(ierr);
3708e561ad89SHong Zhang 
3709e561ad89SHong Zhang     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
371077f764caSHong Zhang     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3711e561ad89SHong Zhang     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
37128761c3d6SHong Zhang     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
37138761c3d6SHong Zhang     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3714e561ad89SHong Zhang     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3715e561ad89SHong Zhang     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3716f2e2784eSPierre Jolivet     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3717e561ad89SHong Zhang   }
3718e561ad89SHong Zhang 
3719bd153df0SHong Zhang   /* numeric phase */
3720647a6520SHong Zhang   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3721bd153df0SHong Zhang   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3722e561ad89SHong Zhang 
3723e561ad89SHong Zhang   for (i=0; i<m; i++) {
3724e561ad89SHong Zhang     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3725e561ad89SHong Zhang     Ii   = i + rstart;
3726bd153df0SHong Zhang     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3727e561ad89SHong Zhang     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3728e561ad89SHong Zhang   }
3729bd153df0SHong Zhang   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3730bd153df0SHong Zhang   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3731e561ad89SHong Zhang   PetscFunctionReturn(0);
3732e561ad89SHong Zhang }
3733