xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision cf9c20a2d58da010f7c4712defbcdf61cc8f72b5)
1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2c5d9258eSSatish Balay 
3b51a4376SLisandro Dalcin #include <petsc/private/hashseti.h>
4c6db04a5SJed Brown #include <petscblaslapack.h>
565a92638SMatthew G. Knepley #include <petscsf.h>
679bdfe76SSatish Balay 
77ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
87ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
97ea3e4caSstefano_zampini #endif
107ea3e4caSstefano_zampini 
11985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
127843d17aSBarry Smith {
137843d17aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14dfbe8321SBarry Smith   PetscErrorCode ierr;
15985db425SBarry Smith   PetscInt       i,*idxb = 0;
1687828ca2SBarry Smith   PetscScalar    *va,*vb;
177843d17aSBarry Smith   Vec            vtmp;
187843d17aSBarry Smith 
197843d17aSBarry Smith   PetscFunctionBegin;
20985db425SBarry Smith   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
211ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22985db425SBarry Smith   if (idx) {
2326fbe8dcSKarl Rupp     for (i=0; i<A->rmap->n; i++) {
2426fbe8dcSKarl Rupp       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2526fbe8dcSKarl Rupp     }
26985db425SBarry Smith   }
277843d17aSBarry Smith 
28d0f46423SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29785e854fSJed Brown   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30985db425SBarry Smith   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
311ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
327843d17aSBarry Smith 
33d0f46423SBarry Smith   for (i=0; i<A->rmap->n; i++) {
3426fbe8dcSKarl Rupp     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3526fbe8dcSKarl Rupp       va[i] = vb[i];
3626fbe8dcSKarl Rupp       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
3726fbe8dcSKarl Rupp     }
387843d17aSBarry Smith   }
397843d17aSBarry Smith 
401ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
411ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42c31cb41cSBarry Smith   ierr = PetscFree(idxb);CHKERRQ(ierr);
436bf464f9SBarry Smith   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
447843d17aSBarry Smith   PetscFunctionReturn(0);
457843d17aSBarry Smith }
467843d17aSBarry Smith 
477087cfbeSBarry Smith PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
487fc3c18eSBarry Smith {
497fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50dfbe8321SBarry Smith   PetscErrorCode ierr;
517fc3c18eSBarry Smith 
527fc3c18eSBarry Smith   PetscFunctionBegin;
537fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
547fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
557fc3c18eSBarry Smith   PetscFunctionReturn(0);
567fc3c18eSBarry Smith }
577fc3c18eSBarry Smith 
587087cfbeSBarry Smith PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
597fc3c18eSBarry Smith {
607fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61dfbe8321SBarry Smith   PetscErrorCode ierr;
627fc3c18eSBarry Smith 
637fc3c18eSBarry Smith   PetscFunctionBegin;
647fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
657fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
667fc3c18eSBarry Smith   PetscFunctionReturn(0);
677fc3c18eSBarry Smith }
687fc3c18eSBarry Smith 
69537820f0SBarry Smith /*
70537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
7157b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
72e06f6af7SJed Brown    storage of the matrix.  This is done in a non scalable way since the
7357b952d6SSatish Balay    length of colmap equals the global matrix length.
7457b952d6SSatish Balay */
75ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
7657b952d6SSatish Balay {
7757b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
7857b952d6SSatish Balay   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
796849ba73SBarry Smith   PetscErrorCode ierr;
80d0f46423SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
8157b952d6SSatish Balay 
82d64ed03dSBarry Smith   PetscFunctionBegin;
83aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
84e23dfa41SBarry Smith   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
8548e59246SSatish Balay   for (i=0; i<nbs; i++) {
863861aac3SJed Brown     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
8748e59246SSatish Balay   }
8848e59246SSatish Balay #else
89580bdb30SBarry Smith   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
903bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
9248e59246SSatish Balay #endif
933a40ed3dSBarry Smith   PetscFunctionReturn(0);
9457b952d6SSatish Balay }
9557b952d6SSatish Balay 
96d40312a9SBarry Smith #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
9780c1aa95SSatish Balay   { \
9880c1aa95SSatish Balay     brow = row/bs;  \
9980c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
10180c1aa95SSatish Balay     bcol = col/bs; \
10280c1aa95SSatish Balay     ridx = row % bs; cidx = col % bs; \
103ab26458aSBarry Smith     low  = 0; high = nrow; \
104ab26458aSBarry Smith     while (high-low > 3) { \
105ab26458aSBarry Smith       t = (low+high)/2; \
106ab26458aSBarry Smith       if (rp[t] > bcol) high = t; \
107ab26458aSBarry Smith       else              low  = t; \
108ab26458aSBarry Smith     } \
109ab26458aSBarry Smith     for (_i=low; _i<high; _i++) { \
11080c1aa95SSatish Balay       if (rp[_i] > bcol) break; \
11180c1aa95SSatish Balay       if (rp[_i] == bcol) { \
11280c1aa95SSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
113eada6651SSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
114eada6651SSatish Balay         else                    *bap  = value;  \
115ac7a638eSSatish Balay         goto a_noinsert; \
11680c1aa95SSatish Balay       } \
11780c1aa95SSatish Balay     } \
11889280ab3SLois Curfman McInnes     if (a->nonew == 1) goto a_noinsert; \
119d40312a9SBarry Smith     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
12180c1aa95SSatish Balay     N = nrow++ - 1;  \
12280c1aa95SSatish Balay     /* shift up all the later entries in this row */ \
123580bdb30SBarry Smith     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124580bdb30SBarry Smith     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125580bdb30SBarry Smith     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
12680c1aa95SSatish Balay     rp[_i]                      = bcol;  \
12780c1aa95SSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
128ac7a638eSSatish Balay a_noinsert:; \
12980c1aa95SSatish Balay     ailen[brow] = nrow; \
13080c1aa95SSatish Balay   }
13157b952d6SSatish Balay 
132d40312a9SBarry Smith #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133ac7a638eSSatish Balay   { \
134ac7a638eSSatish Balay     brow = row/bs;  \
135ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
137ac7a638eSSatish Balay     bcol = col/bs; \
138ac7a638eSSatish Balay     ridx = row % bs; cidx = col % bs; \
139ac7a638eSSatish Balay     low  = 0; high = nrow; \
140ac7a638eSSatish Balay     while (high-low > 3) { \
141ac7a638eSSatish Balay       t = (low+high)/2; \
142ac7a638eSSatish Balay       if (rp[t] > bcol) high = t; \
143ac7a638eSSatish Balay       else              low  = t; \
144ac7a638eSSatish Balay     } \
145ac7a638eSSatish Balay     for (_i=low; _i<high; _i++) { \
146ac7a638eSSatish Balay       if (rp[_i] > bcol) break; \
147ac7a638eSSatish Balay       if (rp[_i] == bcol) { \
148ac7a638eSSatish Balay         bap = ap +  bs2*_i + bs*cidx + ridx; \
149ac7a638eSSatish Balay         if (addv == ADD_VALUES) *bap += value;  \
150ac7a638eSSatish Balay         else                    *bap  = value;  \
151ac7a638eSSatish Balay         goto b_noinsert; \
152ac7a638eSSatish Balay       } \
153ac7a638eSSatish Balay     } \
15489280ab3SLois Curfman McInnes     if (b->nonew == 1) goto b_noinsert; \
155d40312a9SBarry Smith     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157ac7a638eSSatish Balay     N = nrow++ - 1;  \
158ac7a638eSSatish Balay     /* shift up all the later entries in this row */ \
159580bdb30SBarry Smith     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160580bdb30SBarry Smith     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161580bdb30SBarry Smith     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162ac7a638eSSatish Balay     rp[_i]                      = bcol;  \
163ac7a638eSSatish Balay     ap[bs2*_i + bs*cidx + ridx] = value;  \
164ac7a638eSSatish Balay b_noinsert:; \
165ac7a638eSSatish Balay     bilen[brow] = nrow; \
166ac7a638eSSatish Balay   }
167ac7a638eSSatish Balay 
168b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
16957b952d6SSatish Balay {
17057b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
17193fea6afSBarry Smith   MatScalar      value;
172ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
173dfbe8321SBarry Smith   PetscErrorCode ierr;
174b24ad042SBarry Smith   PetscInt       i,j,row,col;
175d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
176d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177d0f46423SBarry Smith   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
17857b952d6SSatish Balay 
179eada6651SSatish Balay   /* Some Variables required in the macro */
18080c1aa95SSatish Balay   Mat         A     = baij->A;
18180c1aa95SSatish Balay   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182b24ad042SBarry Smith   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
1833eda8832SBarry Smith   MatScalar   *aa   =a->a;
184ac7a638eSSatish Balay 
185ac7a638eSSatish Balay   Mat         B     = baij->B;
186ac7a638eSSatish Balay   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187b24ad042SBarry Smith   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
1883eda8832SBarry Smith   MatScalar   *ba   =b->a;
189ac7a638eSSatish Balay 
190b24ad042SBarry Smith   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191b24ad042SBarry Smith   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
1923eda8832SBarry Smith   MatScalar *ap,*bap;
19380c1aa95SSatish Balay 
194d64ed03dSBarry Smith   PetscFunctionBegin;
19557b952d6SSatish Balay   for (i=0; i<m; i++) {
1965ef9f2a5SBarry Smith     if (im[i] < 0) continue;
197*cf9c20a2SJed Brown     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
19857b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
19957b952d6SSatish Balay       row = im[i] - rstart_orig;
20057b952d6SSatish Balay       for (j=0; j<n; j++) {
20157b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) {
20257b952d6SSatish Balay           col = in[j] - cstart_orig;
203db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
204db4deed7SKarl Rupp           else             value = v[i+j*m];
205d40312a9SBarry Smith           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
20673959e64SBarry Smith         } else if (in[j] < 0) continue;
207*cf9c20a2SJed Brown         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
20876bd3646SJed Brown           SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
20976bd3646SJed Brown         } else {
21057b952d6SSatish Balay           if (mat->was_assembled) {
211905e6a2fSBarry Smith             if (!baij->colmap) {
212ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
213905e6a2fSBarry Smith             }
214aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2150f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
216bba1ac68SSatish Balay             col  = col - 1;
21748e59246SSatish Balay #else
218bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
21948e59246SSatish Balay #endif
220c9ef50b2SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
221ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
2228295de27SSatish Balay               col  =  in[j];
2239bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2249bf004c3SSatish Balay               B    = baij->B;
2259bf004c3SSatish Balay               b    = (Mat_SeqBAIJ*)(B)->data;
2269bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2279bf004c3SSatish Balay               ba   =b->a;
228c9ef50b2SBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
229c9ef50b2SBarry Smith             else col += in[j]%bs;
2308295de27SSatish Balay           } else col = in[j];
231db4deed7SKarl Rupp           if (roworiented) value = v[i*n+j];
232db4deed7SKarl Rupp           else             value = v[i+j*m];
233d40312a9SBarry Smith           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
23490da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
23557b952d6SSatish Balay         }
23657b952d6SSatish Balay       }
237d64ed03dSBarry Smith     } else {
2384cb17eb5SBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
23990f02eecSBarry Smith       if (!baij->donotstash) {
2405080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
241ff2fd236SBarry Smith         if (roworiented) {
242b400d20cSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
243ff2fd236SBarry Smith         } else {
244b400d20cSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
24557b952d6SSatish Balay         }
24657b952d6SSatish Balay       }
24757b952d6SSatish Balay     }
24890f02eecSBarry Smith   }
2493a40ed3dSBarry Smith   PetscFunctionReturn(0);
25057b952d6SSatish Balay }
25157b952d6SSatish Balay 
2528ab52850SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
253880c6e6aSBarry Smith {
254880c6e6aSBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
2558ab52850SBarry Smith   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
256880c6e6aSBarry Smith   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
257880c6e6aSBarry Smith   PetscErrorCode    ierr;
2588ab52850SBarry Smith   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
259880c6e6aSBarry Smith   PetscBool         roworiented=a->roworiented;
260880c6e6aSBarry Smith   const PetscScalar *value     = v;
261880c6e6aSBarry Smith   MatScalar         *ap,*aa = a->a,*bap;
262880c6e6aSBarry Smith 
263880c6e6aSBarry Smith   PetscFunctionBegin;
264880c6e6aSBarry Smith   rp   = aj + ai[row];
265880c6e6aSBarry Smith   ap   = aa + bs2*ai[row];
266880c6e6aSBarry Smith   rmax = imax[row];
267880c6e6aSBarry Smith   nrow = ailen[row];
2688ab52850SBarry Smith   value = v;
2698ab52850SBarry Smith   low = 0;
2708ab52850SBarry Smith   high = nrow;
271880c6e6aSBarry Smith   while (high-low > 7) {
272880c6e6aSBarry Smith     t = (low+high)/2;
273880c6e6aSBarry Smith     if (rp[t] > col) high = t;
274880c6e6aSBarry Smith     else             low  = t;
275880c6e6aSBarry Smith   }
276880c6e6aSBarry Smith   for (i=low; i<high; i++) {
277880c6e6aSBarry Smith     if (rp[i] > col) break;
278880c6e6aSBarry Smith     if (rp[i] == col) {
279880c6e6aSBarry Smith       bap = ap +  bs2*i;
280880c6e6aSBarry Smith       if (roworiented) {
281880c6e6aSBarry Smith         if (is == ADD_VALUES) {
2828ab52850SBarry Smith           for (ii=0; ii<bs; ii++) {
283880c6e6aSBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
284880c6e6aSBarry Smith               bap[jj] += *value++;
285880c6e6aSBarry Smith             }
286880c6e6aSBarry Smith           }
287880c6e6aSBarry Smith         } else {
2888ab52850SBarry Smith           for (ii=0; ii<bs; ii++) {
289880c6e6aSBarry Smith             for (jj=ii; jj<bs2; jj+=bs) {
290880c6e6aSBarry Smith               bap[jj] = *value++;
291880c6e6aSBarry Smith             }
292880c6e6aSBarry Smith           }
293880c6e6aSBarry Smith         }
294880c6e6aSBarry Smith       } else {
295880c6e6aSBarry Smith         if (is == ADD_VALUES) {
2968ab52850SBarry Smith           for (ii=0; ii<bs; ii++,value+=bs) {
297880c6e6aSBarry Smith             for (jj=0; jj<bs; jj++) {
298880c6e6aSBarry Smith               bap[jj] += value[jj];
299880c6e6aSBarry Smith             }
300880c6e6aSBarry Smith             bap += bs;
301880c6e6aSBarry Smith           }
302880c6e6aSBarry Smith         } else {
3038ab52850SBarry Smith           for (ii=0; ii<bs; ii++,value+=bs) {
304880c6e6aSBarry Smith             for (jj=0; jj<bs; jj++) {
305880c6e6aSBarry Smith               bap[jj]  = value[jj];
306880c6e6aSBarry Smith             }
307880c6e6aSBarry Smith             bap += bs;
308880c6e6aSBarry Smith           }
309880c6e6aSBarry Smith         }
310880c6e6aSBarry Smith       }
311880c6e6aSBarry Smith       goto noinsert2;
312880c6e6aSBarry Smith     }
313880c6e6aSBarry Smith   }
314880c6e6aSBarry Smith   if (nonew == 1) goto noinsert2;
315880c6e6aSBarry Smith   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
316880c6e6aSBarry Smith   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
317880c6e6aSBarry Smith   N = nrow++ - 1; high++;
318880c6e6aSBarry Smith   /* shift up all the later entries in this row */
319580bdb30SBarry Smith   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
320580bdb30SBarry Smith   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
321880c6e6aSBarry Smith   rp[i] = col;
322880c6e6aSBarry Smith   bap   = ap +  bs2*i;
323880c6e6aSBarry Smith   if (roworiented) {
3248ab52850SBarry Smith     for (ii=0; ii<bs; ii++) {
325880c6e6aSBarry Smith       for (jj=ii; jj<bs2; jj+=bs) {
326880c6e6aSBarry Smith         bap[jj] = *value++;
327880c6e6aSBarry Smith       }
328880c6e6aSBarry Smith     }
329880c6e6aSBarry Smith   } else {
3308ab52850SBarry Smith     for (ii=0; ii<bs; ii++) {
331880c6e6aSBarry Smith       for (jj=0; jj<bs; jj++) {
332880c6e6aSBarry Smith         *bap++ = *value++;
333880c6e6aSBarry Smith       }
334880c6e6aSBarry Smith     }
335880c6e6aSBarry Smith   }
336880c6e6aSBarry Smith   noinsert2:;
337880c6e6aSBarry Smith   ailen[row] = nrow;
338880c6e6aSBarry Smith   PetscFunctionReturn(0);
339880c6e6aSBarry Smith }
340880c6e6aSBarry Smith 
3418ab52850SBarry Smith /*
3428ab52850SBarry Smith     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
3438ab52850SBarry Smith     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
3448ab52850SBarry Smith */
34597e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
346ab26458aSBarry Smith {
347ab26458aSBarry Smith   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
348dd6ea824SBarry Smith   const PetscScalar *value;
349f15d580aSBarry Smith   MatScalar         *barray     = baij->barray;
350ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
351dfbe8321SBarry Smith   PetscErrorCode    ierr;
352899cda47SBarry Smith   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
353899cda47SBarry Smith   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
354d0f46423SBarry Smith   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
355ab26458aSBarry Smith 
356b16ae2b1SBarry Smith   PetscFunctionBegin;
35730793edcSSatish Balay   if (!barray) {
358785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
35982502324SSatish Balay     baij->barray = barray;
36030793edcSSatish Balay   }
36130793edcSSatish Balay 
36226fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
36326fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
36426fbe8dcSKarl Rupp 
365ab26458aSBarry Smith   for (i=0; i<m; i++) {
3665ef9f2a5SBarry Smith     if (im[i] < 0) continue;
367*cf9c20a2SJed Brown     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
368ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
369ab26458aSBarry Smith       row = im[i] - rstart;
370ab26458aSBarry Smith       for (j=0; j<n; j++) {
37115b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
37215b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
373f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
37415b57d14SSatish Balay         } else if ((!roworiented) && (m == 1)) {
375f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
37615b57d14SSatish Balay         } else { /* Here a copy is required */
377ab26458aSBarry Smith           if (roworiented) {
37853ef36baSBarry Smith             value = v + (i*(stepval+bs) + j)*bs;
379ab26458aSBarry Smith           } else {
38053ef36baSBarry Smith             value = v + (j*(stepval+bs) + i)*bs;
381abef11f7SSatish Balay           }
38253ef36baSBarry Smith           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
38326fbe8dcSKarl Rupp             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
38453ef36baSBarry Smith             barray += bs;
38547513183SBarry Smith           }
38630793edcSSatish Balay           barray -= bs2;
38715b57d14SSatish Balay         }
388abef11f7SSatish Balay 
389abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend) {
390abef11f7SSatish Balay           col  = in[j] - cstart;
3918ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
39226fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
393*cf9c20a2SJed Brown         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) {
39476bd3646SJed Brown           SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
39576bd3646SJed Brown         } else {
396ab26458aSBarry Smith           if (mat->was_assembled) {
397ab26458aSBarry Smith             if (!baij->colmap) {
398ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
399ab26458aSBarry Smith             }
400a5eb4965SSatish Balay 
4012515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
402aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
403b24ad042SBarry Smith             { PetscInt data;
4040f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
405e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
406fa46199cSSatish Balay             }
40748e59246SSatish Balay #else
408e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
409a5eb4965SSatish Balay #endif
41048e59246SSatish Balay #endif
411aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
4120f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
413fa46199cSSatish Balay             col  = (col - 1)/bs;
41448e59246SSatish Balay #else
415a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
41648e59246SSatish Balay #endif
4170e9bae81SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
418ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
419ab26458aSBarry Smith               col  =  in[j];
420bb003d0fSBarry Smith             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
421db4deed7SKarl Rupp           } else col = in[j];
4228ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
423ab26458aSBarry Smith         }
424ab26458aSBarry Smith       }
425d64ed03dSBarry Smith     } else {
426bb003d0fSBarry Smith       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
427ab26458aSBarry Smith       if (!baij->donotstash) {
428ff2fd236SBarry Smith         if (roworiented) {
4296fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
430ff2fd236SBarry Smith         } else {
4316fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
432ff2fd236SBarry Smith         }
433abef11f7SSatish Balay       }
434ab26458aSBarry Smith     }
435ab26458aSBarry Smith   }
4363a40ed3dSBarry Smith   PetscFunctionReturn(0);
437ab26458aSBarry Smith }
4386fa18ffdSBarry Smith 
4390bdbc534SSatish Balay #define HASH_KEY 0.6180339887
440b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
441b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
442b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
44397e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
4440bdbc534SSatish Balay {
4450bdbc534SSatish Balay   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
446ace3abfcSBarry Smith   PetscBool      roworiented = baij->roworiented;
447dfbe8321SBarry Smith   PetscErrorCode ierr;
448b24ad042SBarry Smith   PetscInt       i,j,row,col;
449d0f46423SBarry Smith   PetscInt       rstart_orig=mat->rmap->rstart;
450d0f46423SBarry Smith   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
451d0f46423SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
452329f5518SBarry Smith   PetscReal      tmp;
4533eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
454b24ad042SBarry Smith   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4550bdbc534SSatish Balay 
4560bdbc534SSatish Balay   PetscFunctionBegin;
4570bdbc534SSatish Balay   for (i=0; i<m; i++) {
45876bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {
459e32f2f54SBarry Smith       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
460e32f2f54SBarry Smith       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
46176bd3646SJed Brown     }
4620bdbc534SSatish Balay     row = im[i];
463c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4640bdbc534SSatish Balay       for (j=0; j<n; j++) {
4650bdbc534SSatish Balay         col = in[j];
466db4deed7SKarl Rupp         if (roworiented) value = v[i*n+j];
467db4deed7SKarl Rupp         else             value = v[i+j*m];
468b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
469c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
470c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4710bdbc534SSatish Balay 
472c2760754SSatish Balay 
473c2760754SSatish Balay         idx = h1;
47476bd3646SJed Brown         if (PetscDefined(USE_DEBUG)) {
475187ce0cbSSatish Balay           insert_ct++;
476187ce0cbSSatish Balay           total_ct++;
477187ce0cbSSatish Balay           if (HT[idx] != key) {
478187ce0cbSSatish Balay             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
479187ce0cbSSatish Balay             if (idx == size) {
480187ce0cbSSatish Balay               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
481f23aa3ddSBarry Smith               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
482187ce0cbSSatish Balay             }
483187ce0cbSSatish Balay           }
48476bd3646SJed Brown         } else if (HT[idx] != key) {
485c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
486c2760754SSatish Balay           if (idx == size) {
487c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
488f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
489c2760754SSatish Balay           }
490c2760754SSatish Balay         }
491c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
492c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
493c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4940bdbc534SSatish Balay       }
49526fbe8dcSKarl Rupp     } else if (!baij->donotstash) {
496ff2fd236SBarry Smith       if (roworiented) {
497b400d20cSBarry Smith         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
498ff2fd236SBarry Smith       } else {
499b400d20cSBarry Smith         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
5000bdbc534SSatish Balay       }
5010bdbc534SSatish Balay     }
5020bdbc534SSatish Balay   }
50376bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
504abf3b562SBarry Smith     baij->ht_total_ct  += total_ct;
505abf3b562SBarry Smith     baij->ht_insert_ct += insert_ct;
50676bd3646SJed Brown   }
5070bdbc534SSatish Balay   PetscFunctionReturn(0);
5080bdbc534SSatish Balay }
5090bdbc534SSatish Balay 
51097e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
5110bdbc534SSatish Balay {
5120bdbc534SSatish Balay   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
513ace3abfcSBarry Smith   PetscBool         roworiented = baij->roworiented;
514dfbe8321SBarry Smith   PetscErrorCode    ierr;
515b24ad042SBarry Smith   PetscInt          i,j,ii,jj,row,col;
516899cda47SBarry Smith   PetscInt          rstart=baij->rstartbs;
517d0f46423SBarry Smith   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
518b24ad042SBarry Smith   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
519329f5518SBarry Smith   PetscReal         tmp;
5203eda8832SBarry Smith   MatScalar         **HD = baij->hd,*baij_a;
521dd6ea824SBarry Smith   const PetscScalar *v_t,*value;
522b24ad042SBarry Smith   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5230bdbc534SSatish Balay 
524d0a41580SSatish Balay   PetscFunctionBegin;
52526fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
52626fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
52726fbe8dcSKarl Rupp 
5280bdbc534SSatish Balay   for (i=0; i<m; i++) {
52976bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {
530e32f2f54SBarry Smith       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
531e32f2f54SBarry Smith       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
53276bd3646SJed Brown     }
5330bdbc534SSatish Balay     row = im[i];
534ab715e2cSSatish Balay     v_t = v + i*nbs2;
535c2760754SSatish Balay     if (row >= rstart && row < rend) {
5360bdbc534SSatish Balay       for (j=0; j<n; j++) {
5370bdbc534SSatish Balay         col = in[j];
5380bdbc534SSatish Balay 
5390bdbc534SSatish Balay         /* Look up into the Hash Table */
540c2760754SSatish Balay         key = row*Nbs+col+1;
541c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5420bdbc534SSatish Balay 
543c2760754SSatish Balay         idx = h1;
54476bd3646SJed Brown         if (PetscDefined(USE_DEBUG)) {
545187ce0cbSSatish Balay           total_ct++;
546187ce0cbSSatish Balay           insert_ct++;
547187ce0cbSSatish Balay           if (HT[idx] != key) {
548187ce0cbSSatish Balay             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
549187ce0cbSSatish Balay             if (idx == size) {
550187ce0cbSSatish Balay               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
551f23aa3ddSBarry Smith               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552187ce0cbSSatish Balay             }
553187ce0cbSSatish Balay           }
55476bd3646SJed Brown         } else if (HT[idx] != key) {
555c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
556c2760754SSatish Balay           if (idx == size) {
557c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
558f23aa3ddSBarry Smith             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
559c2760754SSatish Balay           }
560c2760754SSatish Balay         }
561c2760754SSatish Balay         baij_a = HD[idx];
5620bdbc534SSatish Balay         if (roworiented) {
563c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
564187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
565187ce0cbSSatish Balay           value = v_t;
566187ce0cbSSatish Balay           v_t  += bs;
567fef45726SSatish Balay           if (addv == ADD_VALUES) {
568c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
569c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
570fef45726SSatish Balay                 baij_a[jj] += *value++;
571b4cc0f5aSSatish Balay               }
572b4cc0f5aSSatish Balay             }
573fef45726SSatish Balay           } else {
574c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
575c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
576fef45726SSatish Balay                 baij_a[jj] = *value++;
577fef45726SSatish Balay               }
578fef45726SSatish Balay             }
579fef45726SSatish Balay           }
5800bdbc534SSatish Balay         } else {
5810bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
582fef45726SSatish Balay           if (addv == ADD_VALUES) {
583b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
5840bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
585fef45726SSatish Balay                 baij_a[jj] += *value++;
586fef45726SSatish Balay               }
587fef45726SSatish Balay             }
588fef45726SSatish Balay           } else {
589fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
590fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
591fef45726SSatish Balay                 baij_a[jj] = *value++;
592fef45726SSatish Balay               }
593b4cc0f5aSSatish Balay             }
5940bdbc534SSatish Balay           }
5950bdbc534SSatish Balay         }
5960bdbc534SSatish Balay       }
5970bdbc534SSatish Balay     } else {
5980bdbc534SSatish Balay       if (!baij->donotstash) {
5990bdbc534SSatish Balay         if (roworiented) {
6008798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6010bdbc534SSatish Balay         } else {
6028798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6030bdbc534SSatish Balay         }
6040bdbc534SSatish Balay       }
6050bdbc534SSatish Balay     }
6060bdbc534SSatish Balay   }
60776bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
608abf3b562SBarry Smith     baij->ht_total_ct  += total_ct;
609abf3b562SBarry Smith     baij->ht_insert_ct += insert_ct;
61076bd3646SJed Brown   }
6110bdbc534SSatish Balay   PetscFunctionReturn(0);
6120bdbc534SSatish Balay }
613133cdb44SSatish Balay 
614b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615d6de1c52SSatish Balay {
616d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
6176849ba73SBarry Smith   PetscErrorCode ierr;
618d0f46423SBarry Smith   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
619d0f46423SBarry Smith   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
620d6de1c52SSatish Balay 
621133cdb44SSatish Balay   PetscFunctionBegin;
622d6de1c52SSatish Balay   for (i=0; i<m; i++) {
623e32f2f54SBarry Smith     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
624e32f2f54SBarry Smith     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
625d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
627d6de1c52SSatish Balay       for (j=0; j<n; j++) {
628e32f2f54SBarry Smith         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
629e32f2f54SBarry Smith         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
630d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
631d6de1c52SSatish Balay           col  = idxn[j] - bscstart;
63298dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633d64ed03dSBarry Smith         } else {
634905e6a2fSBarry Smith           if (!baij->colmap) {
635ab9863d7SBarry Smith             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
636905e6a2fSBarry Smith           }
637aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
6380f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
639fa46199cSSatish Balay           data--;
64048e59246SSatish Balay #else
64148e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
64248e59246SSatish Balay #endif
64348e59246SSatish Balay           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644d9d09a02SSatish Balay           else {
64548e59246SSatish Balay             col  = data + idxn[j]%bs;
64698dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647d6de1c52SSatish Balay           }
648d6de1c52SSatish Balay         }
649d6de1c52SSatish Balay       }
650f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
651d6de1c52SSatish Balay   }
6523a40ed3dSBarry Smith   PetscFunctionReturn(0);
653d6de1c52SSatish Balay }
654d6de1c52SSatish Balay 
655dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
656d6de1c52SSatish Balay {
657d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
658d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
659dfbe8321SBarry Smith   PetscErrorCode ierr;
660d0f46423SBarry Smith   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
661329f5518SBarry Smith   PetscReal      sum = 0.0;
6623eda8832SBarry Smith   MatScalar      *v;
663d6de1c52SSatish Balay 
664d64ed03dSBarry Smith   PetscFunctionBegin;
665d6de1c52SSatish Balay   if (baij->size == 1) {
666064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
667d6de1c52SSatish Balay   } else {
668d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
669d6de1c52SSatish Balay       v  = amat->a;
6708a62d963SHong Zhang       nz = amat->nz*bs2;
6718a62d963SHong Zhang       for (i=0; i<nz; i++) {
672329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
673d6de1c52SSatish Balay       }
674d6de1c52SSatish Balay       v  = bmat->a;
6758a62d963SHong Zhang       nz = bmat->nz*bs2;
6768a62d963SHong Zhang       for (i=0; i<nz; i++) {
677329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
678d6de1c52SSatish Balay       }
679b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
6808f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
6818a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
6828a62d963SHong Zhang       PetscReal *tmp,*tmp2;
683899cda47SBarry Smith       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
6848f8f2f0dSBarry Smith       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
685857a15f1SBarry Smith       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
6868a62d963SHong Zhang       v    = amat->a; jj = amat->j;
6878a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
6888a62d963SHong Zhang         for (j=0; j<bs; j++) {
6898a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
6908a62d963SHong Zhang           for (row=0; row<bs; row++) {
6918a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
6928a62d963SHong Zhang           }
6938a62d963SHong Zhang         }
6948a62d963SHong Zhang         jj++;
6958a62d963SHong Zhang       }
6968a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
6978a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
6988a62d963SHong Zhang         for (j=0; j<bs; j++) {
6998a62d963SHong Zhang           col = bs*garray[*jj] + j;
7008a62d963SHong Zhang           for (row=0; row<bs; row++) {
7018a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
7028a62d963SHong Zhang           }
7038a62d963SHong Zhang         }
7048a62d963SHong Zhang         jj++;
7058a62d963SHong Zhang       }
706b2566f29SBarry Smith       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
7078a62d963SHong Zhang       *nrm = 0.0;
708d0f46423SBarry Smith       for (j=0; j<mat->cmap->N; j++) {
7098a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
7108a62d963SHong Zhang       }
711857a15f1SBarry Smith       ierr = PetscFree(tmp);CHKERRQ(ierr);
712857a15f1SBarry Smith       ierr = PetscFree(tmp2);CHKERRQ(ierr);
7138a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
714577dd1f9SKris Buschelman       PetscReal *sums;
715785e854fSJed Brown       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
7168a62d963SHong Zhang       sum  = 0.0;
7178a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
7188a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
7198a62d963SHong Zhang         v  = amat->a + bs2*amat->i[j];
7208a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
7218a62d963SHong Zhang         for (i=0; i<nz; i++) {
7228a62d963SHong Zhang           for (col=0; col<bs; col++) {
7238a62d963SHong Zhang             for (row=0; row<bs; row++) {
7248a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
7258a62d963SHong Zhang             }
7268a62d963SHong Zhang           }
7278a62d963SHong Zhang         }
7288a62d963SHong Zhang         v  = bmat->a + bs2*bmat->i[j];
7298a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
7308a62d963SHong Zhang         for (i=0; i<nz; i++) {
7318a62d963SHong Zhang           for (col=0; col<bs; col++) {
7328a62d963SHong Zhang             for (row=0; row<bs; row++) {
7338a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
7348a62d963SHong Zhang             }
7358a62d963SHong Zhang           }
7368a62d963SHong Zhang         }
7378a62d963SHong Zhang         for (row=0; row<bs; row++) {
7388a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
7398a62d963SHong Zhang         }
7408a62d963SHong Zhang       }
741b2566f29SBarry Smith       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
743ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
744d64ed03dSBarry Smith   }
7453a40ed3dSBarry Smith   PetscFunctionReturn(0);
746d6de1c52SSatish Balay }
74757b952d6SSatish Balay 
748fef45726SSatish Balay /*
749fef45726SSatish Balay   Creates the hash table, and sets the table
750fef45726SSatish Balay   This table is created only once.
751fef45726SSatish Balay   If new entried need to be added to the matrix
752fef45726SSatish Balay   then the hash table has to be destroyed and
753fef45726SSatish Balay   recreated.
754fef45726SSatish Balay */
755dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
756596b8d2eSBarry Smith {
757596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
758596b8d2eSBarry Smith   Mat            A     = baij->A,B=baij->B;
759596b8d2eSBarry Smith   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
760b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7616849ba73SBarry Smith   PetscErrorCode ierr;
762fca92195SBarry Smith   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
763899cda47SBarry Smith   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
764b24ad042SBarry Smith   PetscInt       *HT,key;
7653eda8832SBarry Smith   MatScalar      **HD;
766329f5518SBarry Smith   PetscReal      tmp;
7676cf91177SBarry Smith #if defined(PETSC_USE_INFO)
768b24ad042SBarry Smith   PetscInt ct=0,max=0;
7694a15367fSSatish Balay #endif
770fef45726SSatish Balay 
771d64ed03dSBarry Smith   PetscFunctionBegin;
772fca92195SBarry Smith   if (baij->ht) PetscFunctionReturn(0);
773fef45726SSatish Balay 
774fca92195SBarry Smith   baij->ht_size = (PetscInt)(factor*nz);
775fca92195SBarry Smith   ht_size       = baij->ht_size;
7760bdbc534SSatish Balay 
777fef45726SSatish Balay   /* Allocate Memory for Hash Table */
7781795a4d1SJed Brown   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
779b9e4cc15SSatish Balay   HD   = baij->hd;
780a07cd24cSSatish Balay   HT   = baij->ht;
781b9e4cc15SSatish Balay 
782596b8d2eSBarry Smith   /* Loop Over A */
7830bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
784596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
7850bdbc534SSatish Balay       row = i+rstart;
7860bdbc534SSatish Balay       col = aj[j]+cstart;
787596b8d2eSBarry Smith 
788187ce0cbSSatish Balay       key = row*Nbs + col + 1;
789fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
790fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
791fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
792fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
793fca92195SBarry Smith           HD[(h1+k)%ht_size] = a->a + j*bs2;
794596b8d2eSBarry Smith           break;
7956cf91177SBarry Smith #if defined(PETSC_USE_INFO)
796187ce0cbSSatish Balay         } else {
797187ce0cbSSatish Balay           ct++;
798187ce0cbSSatish Balay #endif
799596b8d2eSBarry Smith         }
800187ce0cbSSatish Balay       }
8016cf91177SBarry Smith #if defined(PETSC_USE_INFO)
802187ce0cbSSatish Balay       if (k> max) max = k;
803187ce0cbSSatish Balay #endif
804596b8d2eSBarry Smith     }
805596b8d2eSBarry Smith   }
806596b8d2eSBarry Smith   /* Loop Over B */
8070bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
808596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
8090bdbc534SSatish Balay       row = i+rstart;
8100bdbc534SSatish Balay       col = garray[bj[j]];
811187ce0cbSSatish Balay       key = row*Nbs + col + 1;
812fca92195SBarry Smith       h1  = HASH(ht_size,key,tmp);
813fca92195SBarry Smith       for (k=0; k<ht_size; k++) {
814fca92195SBarry Smith         if (!HT[(h1+k)%ht_size]) {
815fca92195SBarry Smith           HT[(h1+k)%ht_size] = key;
816fca92195SBarry Smith           HD[(h1+k)%ht_size] = b->a + j*bs2;
817596b8d2eSBarry Smith           break;
8186cf91177SBarry Smith #if defined(PETSC_USE_INFO)
819187ce0cbSSatish Balay         } else {
820187ce0cbSSatish Balay           ct++;
821187ce0cbSSatish Balay #endif
822596b8d2eSBarry Smith         }
823187ce0cbSSatish Balay       }
8246cf91177SBarry Smith #if defined(PETSC_USE_INFO)
825187ce0cbSSatish Balay       if (k> max) max = k;
826187ce0cbSSatish Balay #endif
827596b8d2eSBarry Smith     }
828596b8d2eSBarry Smith   }
829596b8d2eSBarry Smith 
830596b8d2eSBarry Smith   /* Print Summary */
8316cf91177SBarry Smith #if defined(PETSC_USE_INFO)
832fca92195SBarry Smith   for (i=0,j=0; i<ht_size; i++) {
83326fbe8dcSKarl Rupp     if (HT[i]) j++;
834c38d4ed2SBarry Smith   }
8351e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
836187ce0cbSSatish Balay #endif
8373a40ed3dSBarry Smith   PetscFunctionReturn(0);
838596b8d2eSBarry Smith }
83957b952d6SSatish Balay 
840dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
841bbb85fb3SSatish Balay {
842bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
843dfbe8321SBarry Smith   PetscErrorCode ierr;
844b24ad042SBarry Smith   PetscInt       nstash,reallocs;
845bbb85fb3SSatish Balay 
846bbb85fb3SSatish Balay   PetscFunctionBegin;
84726fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
848bbb85fb3SSatish Balay 
849d0f46423SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
8501e2582c4SBarry Smith   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
8518798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8521e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
85346680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
8541e2582c4SBarry Smith   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
855bbb85fb3SSatish Balay   PetscFunctionReturn(0);
856bbb85fb3SSatish Balay }
857bbb85fb3SSatish Balay 
858dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
859bbb85fb3SSatish Balay {
860bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
86191c97fd4SSatish Balay   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
8626849ba73SBarry Smith   PetscErrorCode ierr;
863b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
864e44c0bd4SBarry Smith   PetscInt       *row,*col;
865ace3abfcSBarry Smith   PetscBool      r1,r2,r3,other_disassembled;
8663eda8832SBarry Smith   MatScalar      *val;
867b24ad042SBarry Smith   PetscMPIInt    n;
868bbb85fb3SSatish Balay 
869bbb85fb3SSatish Balay   PetscFunctionBegin;
8705fd66863SKarl Rupp   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
8714cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
872a2d1c673SSatish Balay     while (1) {
8738798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
874a2d1c673SSatish Balay       if (!flg) break;
875a2d1c673SSatish Balay 
876bbb85fb3SSatish Balay       for (i=0; i<n;) {
877bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
87826fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
87926fbe8dcSKarl Rupp           if (row[j] != rstart) break;
88026fbe8dcSKarl Rupp         }
881bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
882bbb85fb3SSatish Balay         else       ncols = n-i;
883bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
8844b4eb8d3SJed Brown         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
885bbb85fb3SSatish Balay         i    = j;
886bbb85fb3SSatish Balay       }
887bbb85fb3SSatish Balay     }
8888798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
889a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
890a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
891a2d1c673SSatish Balay        restore the original flags */
892a2d1c673SSatish Balay     r1 = baij->roworiented;
893a2d1c673SSatish Balay     r2 = a->roworiented;
89491c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
89526fbe8dcSKarl Rupp 
8967c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
8977c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
89826fbe8dcSKarl Rupp 
89991c97fd4SSatish Balay     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
900a2d1c673SSatish Balay     while (1) {
9018798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
902a2d1c673SSatish Balay       if (!flg) break;
903a2d1c673SSatish Balay 
904a2d1c673SSatish Balay       for (i=0; i<n;) {
905a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
90626fbe8dcSKarl Rupp         for (j=i,rstart=row[j]; j<n; j++) {
90726fbe8dcSKarl Rupp           if (row[j] != rstart) break;
90826fbe8dcSKarl Rupp         }
909a2d1c673SSatish Balay         if (j < n) ncols = j-i;
910a2d1c673SSatish Balay         else       ncols = n-i;
9114b4eb8d3SJed Brown         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
912a2d1c673SSatish Balay         i    = j;
913a2d1c673SSatish Balay       }
914a2d1c673SSatish Balay     }
9158798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
91626fbe8dcSKarl Rupp 
917a2d1c673SSatish Balay     baij->roworiented = r1;
918a2d1c673SSatish Balay     a->roworiented    = r2;
91926fbe8dcSKarl Rupp 
92091c97fd4SSatish Balay     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
921bbb85fb3SSatish Balay   }
922bbb85fb3SSatish Balay 
923bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
924bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
925bbb85fb3SSatish Balay 
926bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
927bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
928bbb85fb3SSatish Balay   /*
929bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
930bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
931bbb85fb3SSatish Balay   */
932bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
933b2566f29SBarry Smith     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
934bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
935ab9863d7SBarry Smith       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
936bbb85fb3SSatish Balay     }
937bbb85fb3SSatish Balay   }
938bbb85fb3SSatish Balay 
939bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
940bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
941bbb85fb3SSatish Balay   }
942bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
943bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
944bbb85fb3SSatish Balay 
9456cf91177SBarry Smith #if defined(PETSC_USE_INFO)
946bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
947abf3b562SBarry Smith     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
94826fbe8dcSKarl Rupp 
949bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
950bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
951bbb85fb3SSatish Balay   }
952bbb85fb3SSatish Balay #endif
953bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
954bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
95526fbe8dcSKarl Rupp 
956bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
957bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
958bbb85fb3SSatish Balay   }
959bbb85fb3SSatish Balay 
960fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
96126fbe8dcSKarl Rupp 
962606d414cSSatish Balay   baij->rowvalues = 0;
9634f9cfa9eSBarry Smith 
9644f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
9654f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
966e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
967b2566f29SBarry Smith     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
968e56f5c9eSBarry Smith   }
969bbb85fb3SSatish Balay   PetscFunctionReturn(0);
970bbb85fb3SSatish Balay }
97157b952d6SSatish Balay 
9727da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
9739804daf3SBarry Smith #include <petscdraw.h>
9746849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
97557b952d6SSatish Balay {
97657b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
977dfbe8321SBarry Smith   PetscErrorCode    ierr;
9787da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
979d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
980ace3abfcSBarry Smith   PetscBool         iascii,isdraw;
981b0a32e0cSBarry Smith   PetscViewer       sviewer;
982f3ef73ceSBarry Smith   PetscViewerFormat format;
98357b952d6SSatish Balay 
984d64ed03dSBarry Smith   PetscFunctionBegin;
985251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
986251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
98732077d6dSBarry Smith   if (iascii) {
988b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
989456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
9904e220ebcSLois Curfman McInnes       MatInfo info;
991ce94432eSBarry Smith       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
992d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
9931575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
994b1e9c6f1SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
995b1e9c6f1SBarry Smith                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
996d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
997e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
998d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
999e6dd01d4SJed Brown       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1000b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
10011575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
100207d81ca4SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
100357b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10043a40ed3dSBarry Smith       PetscFunctionReturn(0);
1005fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
100677431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
10073a40ed3dSBarry Smith       PetscFunctionReturn(0);
100804929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
100904929863SHong Zhang       PetscFunctionReturn(0);
101057b952d6SSatish Balay     }
101157b952d6SSatish Balay   }
101257b952d6SSatish Balay 
10130f5bd95cSBarry Smith   if (isdraw) {
1014b0a32e0cSBarry Smith     PetscDraw draw;
1015ace3abfcSBarry Smith     PetscBool isnull;
1016b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
101745f3bb6eSLisandro Dalcin     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
101845f3bb6eSLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
101957b952d6SSatish Balay   }
102057b952d6SSatish Balay 
10217da1fb6eSBarry Smith   {
102257b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
102357b952d6SSatish Balay     Mat         A;
102457b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1025d0f46423SBarry Smith     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
10263eda8832SBarry Smith     MatScalar   *a;
10273e219373SBarry Smith     const char  *matname;
102857b952d6SSatish Balay 
1029f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1030f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
1031ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
103257b952d6SSatish Balay     if (!rank) {
1033f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1034d64ed03dSBarry Smith     } else {
1035f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
103657b952d6SSatish Balay     }
1037f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
10380298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
10392b82e772SSatish Balay     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
10403bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
104157b952d6SSatish Balay 
104257b952d6SSatish Balay     /* copy over the A part */
104357b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
104457b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1045785e854fSJed Brown     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
104657b952d6SSatish Balay 
104757b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1048899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
104926fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
105057b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1051899cda47SBarry Smith         col = (baij->cstartbs+aj[j])*bs;
105257b952d6SSatish Balay         for (k=0; k<bs; k++) {
105397e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054cee3aa6bSSatish Balay           col++; a += bs;
105557b952d6SSatish Balay         }
105657b952d6SSatish Balay       }
105757b952d6SSatish Balay     }
105857b952d6SSatish Balay     /* copy over the B part */
105957b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
106057b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
106157b952d6SSatish Balay     for (i=0; i<mbs; i++) {
1062899cda47SBarry Smith       rvals[0] = bs*(baij->rstartbs + i);
106326fbe8dcSKarl Rupp       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
106457b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
106557b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
106657b952d6SSatish Balay         for (k=0; k<bs; k++) {
106797e5c40aSBarry Smith           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068cee3aa6bSSatish Balay           col++; a += bs;
106957b952d6SSatish Balay         }
107057b952d6SSatish Balay       }
107157b952d6SSatish Balay     }
1072606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10736d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10746d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107555843e3eSBarry Smith     /*
107655843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1077b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
107855843e3eSBarry Smith     */
10793f08860eSBarry Smith     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1080ade3a672SBarry Smith     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
10813e219373SBarry Smith     if (!rank) {
1082ade3a672SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
10837da1fb6eSBarry Smith       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
108457b952d6SSatish Balay     }
10853f08860eSBarry Smith     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
10861575c14dSBarry Smith     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
10876bf464f9SBarry Smith     ierr = MatDestroy(&A);CHKERRQ(ierr);
108857b952d6SSatish Balay   }
10893a40ed3dSBarry Smith   PetscFunctionReturn(0);
109057b952d6SSatish Balay }
109157b952d6SSatish Balay 
1092618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
1093b51a4376SLisandro Dalcin PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1094660746e0SBarry Smith {
1095b51a4376SLisandro Dalcin   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1096b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1097b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1098b51a4376SLisandro Dalcin   const PetscInt *garray = aij->garray;
1099b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1100b51a4376SLisandro Dalcin   PetscInt       *rowlens,*colidxs;
1101b51a4376SLisandro Dalcin   PetscScalar    *matvals;
1102660746e0SBarry Smith   PetscErrorCode ierr;
1103660746e0SBarry Smith 
1104660746e0SBarry Smith   PetscFunctionBegin;
1105b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1106b51a4376SLisandro Dalcin 
1107b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1108b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1109b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1110b51a4376SLisandro Dalcin   rs = mat->rmap->rstart;
1111b51a4376SLisandro Dalcin   cs = mat->cmap->rstart;
1112b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1113b51a4376SLisandro Dalcin   nz = bs*bs*(A->nz + B->nz);
1114b51a4376SLisandro Dalcin 
1115b51a4376SLisandro Dalcin   /* write matrix header */
1116660746e0SBarry Smith   header[0] = MAT_FILE_CLASSID;
1117b51a4376SLisandro Dalcin   header[1] = M; header[2] = N; header[3] = nz;
1118ce94432eSBarry Smith   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1119b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1120660746e0SBarry Smith 
1121b51a4376SLisandro Dalcin   /* fill in and store row lengths */
1122b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1123b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++)
1124b51a4376SLisandro Dalcin     for (j=0; j<bs; j++)
1125b51a4376SLisandro Dalcin       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1126b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1127b51a4376SLisandro Dalcin   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1128660746e0SBarry Smith 
1129b51a4376SLisandro Dalcin   /* fill in and store column indices */
1130b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1131b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++) {
1132b51a4376SLisandro Dalcin     for (k=0; k<bs; k++) {
1133b51a4376SLisandro Dalcin       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1134b51a4376SLisandro Dalcin         if (garray[B->j[jb]] > cs/bs) break;
1135b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1136b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1137660746e0SBarry Smith       }
1138b51a4376SLisandro Dalcin       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1139b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1140b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1141b51a4376SLisandro Dalcin       for (; jb<B->i[i+1]; jb++)
1142b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1143b51a4376SLisandro Dalcin           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1144660746e0SBarry Smith     }
1145660746e0SBarry Smith   }
1146660746e0SBarry Smith   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1147b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1148b51a4376SLisandro Dalcin   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1149660746e0SBarry Smith 
1150b51a4376SLisandro Dalcin   /* fill in and store nonzero values */
1151b51a4376SLisandro Dalcin   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1152b51a4376SLisandro Dalcin   for (cnt=0, i=0; i<A->mbs; i++) {
1153b51a4376SLisandro Dalcin     for (k=0; k<bs; k++) {
1154b51a4376SLisandro Dalcin       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1155b51a4376SLisandro Dalcin         if (garray[B->j[jb]] > cs/bs) break;
1156b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1157b51a4376SLisandro Dalcin           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1158660746e0SBarry Smith       }
1159b51a4376SLisandro Dalcin       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1160b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1161b51a4376SLisandro Dalcin           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1162b51a4376SLisandro Dalcin       for (; jb<B->i[i+1]; jb++)
1163b51a4376SLisandro Dalcin         for (l=0; l<bs; l++)
1164d21b9a37SPierre Jolivet           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1165660746e0SBarry Smith     }
1166b51a4376SLisandro Dalcin   }
1167b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1168b51a4376SLisandro Dalcin   ierr = PetscFree(matvals);CHKERRQ(ierr);
1169660746e0SBarry Smith 
1170b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
1171b51a4376SLisandro Dalcin   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1172660746e0SBarry Smith   PetscFunctionReturn(0);
1173660746e0SBarry Smith }
1174660746e0SBarry Smith 
1175dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
117657b952d6SSatish Balay {
1177dfbe8321SBarry Smith   PetscErrorCode ierr;
1178ace3abfcSBarry Smith   PetscBool      iascii,isdraw,issocket,isbinary;
117957b952d6SSatish Balay 
1180d64ed03dSBarry Smith   PetscFunctionBegin;
1181251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1182251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1183251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1184251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1185660746e0SBarry Smith   if (iascii || isdraw || issocket) {
11867b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1187660746e0SBarry Smith   } else if (isbinary) {
1188660746e0SBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
118957b952d6SSatish Balay   }
11903a40ed3dSBarry Smith   PetscFunctionReturn(0);
119157b952d6SSatish Balay }
119257b952d6SSatish Balay 
1193dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
119479bdfe76SSatish Balay {
119579bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1196dfbe8321SBarry Smith   PetscErrorCode ierr;
119779bdfe76SSatish Balay 
1198d64ed03dSBarry Smith   PetscFunctionBegin;
1199aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1200d0f46423SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
120179bdfe76SSatish Balay #endif
12028798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12038798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
12046bf464f9SBarry Smith   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
12056bf464f9SBarry Smith   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1206aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
12076bc0bbbfSBarry Smith   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
120848e59246SSatish Balay #else
120905b42c5fSBarry Smith   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
121048e59246SSatish Balay #endif
121105b42c5fSBarry Smith   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
12126bf464f9SBarry Smith   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
12136bf464f9SBarry Smith   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1214fca92195SBarry Smith   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
121505b42c5fSBarry Smith   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1216fca92195SBarry Smith   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1217899cda47SBarry Smith   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1218bf0cc555SLisandro Dalcin   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1219901853e0SKris Buschelman 
1220dbd8c25aSHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1221bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1222bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1223bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1224bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1225bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1226bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1227bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1228bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
12297ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
12307ea3e4caSstefano_zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
12317ea3e4caSstefano_zampini #endif
1232c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
12333a40ed3dSBarry Smith   PetscFunctionReturn(0);
123479bdfe76SSatish Balay }
123579bdfe76SSatish Balay 
1236dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1237cee3aa6bSSatish Balay {
1238cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1239dfbe8321SBarry Smith   PetscErrorCode ierr;
1240b24ad042SBarry Smith   PetscInt       nt;
1241cee3aa6bSSatish Balay 
1242d64ed03dSBarry Smith   PetscFunctionBegin;
1243e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1244e7e72b3dSBarry Smith   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1245e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1246e7e72b3dSBarry Smith   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1247ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1249ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
12513a40ed3dSBarry Smith   PetscFunctionReturn(0);
1252cee3aa6bSSatish Balay }
1253cee3aa6bSSatish Balay 
1254dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1255cee3aa6bSSatish Balay {
1256cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1257dfbe8321SBarry Smith   PetscErrorCode ierr;
1258d64ed03dSBarry Smith 
1259d64ed03dSBarry Smith   PetscFunctionBegin;
1260ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1262ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12643a40ed3dSBarry Smith   PetscFunctionReturn(0);
1265cee3aa6bSSatish Balay }
1266cee3aa6bSSatish Balay 
1267dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1268cee3aa6bSSatish Balay {
1269cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1270dfbe8321SBarry Smith   PetscErrorCode ierr;
1271cee3aa6bSSatish Balay 
1272d64ed03dSBarry Smith   PetscFunctionBegin;
1273cee3aa6bSSatish Balay   /* do nondiagonal part */
12747c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1275cee3aa6bSSatish Balay   /* do local part */
12767c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1277e4a140f6SJunchao Zhang   /* add partial results together */
1278ca9f406cSSatish Balay   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1279ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
12803a40ed3dSBarry Smith   PetscFunctionReturn(0);
1281cee3aa6bSSatish Balay }
1282cee3aa6bSSatish Balay 
1283dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1284cee3aa6bSSatish Balay {
1285cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1286dfbe8321SBarry Smith   PetscErrorCode ierr;
1287cee3aa6bSSatish Balay 
1288d64ed03dSBarry Smith   PetscFunctionBegin;
1289cee3aa6bSSatish Balay   /* do nondiagonal part */
12907c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1291cee3aa6bSSatish Balay   /* do local part */
12927c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1293e4a140f6SJunchao Zhang   /* add partial results together */
1294e4a140f6SJunchao Zhang   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1295ca9f406cSSatish Balay   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
12963a40ed3dSBarry Smith   PetscFunctionReturn(0);
1297cee3aa6bSSatish Balay }
1298cee3aa6bSSatish Balay 
1299cee3aa6bSSatish Balay /*
1300cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1301cee3aa6bSSatish Balay    diagonal block
1302cee3aa6bSSatish Balay */
1303dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1304cee3aa6bSSatish Balay {
1305cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1306dfbe8321SBarry Smith   PetscErrorCode ierr;
1307d64ed03dSBarry Smith 
1308d64ed03dSBarry Smith   PetscFunctionBegin;
1309e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13103a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13113a40ed3dSBarry Smith   PetscFunctionReturn(0);
1312cee3aa6bSSatish Balay }
1313cee3aa6bSSatish Balay 
1314f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1315cee3aa6bSSatish Balay {
1316cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1317dfbe8321SBarry Smith   PetscErrorCode ierr;
1318d64ed03dSBarry Smith 
1319d64ed03dSBarry Smith   PetscFunctionBegin;
1320f4df32b1SMatthew Knepley   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1321f4df32b1SMatthew Knepley   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
13223a40ed3dSBarry Smith   PetscFunctionReturn(0);
1323cee3aa6bSSatish Balay }
1324026e39d0SSatish Balay 
1325b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326acdf5bf4SSatish Balay {
1327acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
132887828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
13296849ba73SBarry Smith   PetscErrorCode ierr;
1330d0f46423SBarry Smith   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1331d0f46423SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1332899cda47SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1333acdf5bf4SSatish Balay 
1334d64ed03dSBarry Smith   PetscFunctionBegin;
1335e7e72b3dSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1336e32f2f54SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1337acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1338acdf5bf4SSatish Balay 
1339acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1340acdf5bf4SSatish Balay     /*
1341acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1342acdf5bf4SSatish Balay     */
1343acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1344b24ad042SBarry Smith     PetscInt    max = 1,mbs = mat->mbs,tmp;
1345bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1346acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
134726fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1348acdf5bf4SSatish Balay     }
1349dcca6d9dSJed Brown     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1350acdf5bf4SSatish Balay   }
1351d9d09a02SSatish Balay   lrow = row - brstart;
1352acdf5bf4SSatish Balay 
1353acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1354acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1355acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1356f830108cSBarry Smith   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1357f830108cSBarry Smith   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1358acdf5bf4SSatish Balay   nztot = nzA + nzB;
1359acdf5bf4SSatish Balay 
1360acdf5bf4SSatish Balay   cmap = mat->garray;
1361acdf5bf4SSatish Balay   if (v  || idx) {
1362acdf5bf4SSatish Balay     if (nztot) {
1363acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1364b24ad042SBarry Smith       PetscInt imark = -1;
1365acdf5bf4SSatish Balay       if (v) {
1366acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1367acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1368d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1369acdf5bf4SSatish Balay           else break;
1370acdf5bf4SSatish Balay         }
1371acdf5bf4SSatish Balay         imark = i;
1372acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1373acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1374acdf5bf4SSatish Balay       }
1375acdf5bf4SSatish Balay       if (idx) {
1376acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1377acdf5bf4SSatish Balay         if (imark > -1) {
1378acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1379bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1380acdf5bf4SSatish Balay           }
1381acdf5bf4SSatish Balay         } else {
1382acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
138326fbe8dcSKarl Rupp             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1384acdf5bf4SSatish Balay             else break;
1385acdf5bf4SSatish Balay           }
1386acdf5bf4SSatish Balay           imark = i;
1387acdf5bf4SSatish Balay         }
1388d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1389d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1390acdf5bf4SSatish Balay       }
1391d64ed03dSBarry Smith     } else {
1392d212a18eSSatish Balay       if (idx) *idx = 0;
1393d212a18eSSatish Balay       if (v)   *v   = 0;
1394d212a18eSSatish Balay     }
1395acdf5bf4SSatish Balay   }
1396acdf5bf4SSatish Balay   *nz  = nztot;
1397f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1398f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
13993a40ed3dSBarry Smith   PetscFunctionReturn(0);
1400acdf5bf4SSatish Balay }
1401acdf5bf4SSatish Balay 
1402b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1403acdf5bf4SSatish Balay {
1404acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1405d64ed03dSBarry Smith 
1406d64ed03dSBarry Smith   PetscFunctionBegin;
1407e7e72b3dSBarry Smith   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1408acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14093a40ed3dSBarry Smith   PetscFunctionReturn(0);
1410acdf5bf4SSatish Balay }
1411acdf5bf4SSatish Balay 
1412dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
141358667388SSatish Balay {
141458667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1415dfbe8321SBarry Smith   PetscErrorCode ierr;
1416d64ed03dSBarry Smith 
1417d64ed03dSBarry Smith   PetscFunctionBegin;
141858667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
141958667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14203a40ed3dSBarry Smith   PetscFunctionReturn(0);
142158667388SSatish Balay }
14220ac07820SSatish Balay 
1423dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14240ac07820SSatish Balay {
14254e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
14264e220ebcSLois Curfman McInnes   Mat            A  = a->A,B = a->B;
1427dfbe8321SBarry Smith   PetscErrorCode ierr;
14283966268fSBarry Smith   PetscLogDouble isend[5],irecv[5];
14290ac07820SSatish Balay 
1430d64ed03dSBarry Smith   PetscFunctionBegin;
1431d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
143226fbe8dcSKarl Rupp 
14334e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
143426fbe8dcSKarl Rupp 
14350e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1436de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
143726fbe8dcSKarl Rupp 
14384e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
143926fbe8dcSKarl Rupp 
14400e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1441de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
144226fbe8dcSKarl Rupp 
14430ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14444e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14454e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14464e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14474e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14484e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14490ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14503966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
145126fbe8dcSKarl Rupp 
14524e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14534e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14544e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14554e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14564e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14570ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14583966268fSBarry Smith     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
145926fbe8dcSKarl Rupp 
14604e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14614e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14624e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14634e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14644e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1465ce94432eSBarry Smith   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
14664e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14674e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14684e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14693a40ed3dSBarry Smith   PetscFunctionReturn(0);
14700ac07820SSatish Balay }
14710ac07820SSatish Balay 
1472ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
147358667388SSatish Balay {
147458667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1475dfbe8321SBarry Smith   PetscErrorCode ierr;
147658667388SSatish Balay 
1477d64ed03dSBarry Smith   PetscFunctionBegin;
147812c028f9SKris Buschelman   switch (op) {
1479512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
148012c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
148128b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1482a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
148312c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
148443674050SBarry Smith     MatCheckPreallocated(A,1);
14854e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
14864e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
148712c028f9SKris Buschelman     break;
148812c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
148943674050SBarry Smith     MatCheckPreallocated(A,1);
14904e0d8c25SBarry Smith     a->roworiented = flg;
149126fbe8dcSKarl Rupp 
14924e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
14934e0d8c25SBarry Smith     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
149412c028f9SKris Buschelman     break;
14954e0d8c25SBarry Smith   case MAT_NEW_DIAGONALS:
1496071fcb05SBarry Smith   case MAT_SORTED_FULL:
1497290bbb0aSBarry Smith     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
149812c028f9SKris Buschelman     break;
149912c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15004e0d8c25SBarry Smith     a->donotstash = flg;
150112c028f9SKris Buschelman     break;
150212c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15034e0d8c25SBarry Smith     a->ht_flag = flg;
1504abf3b562SBarry Smith     a->ht_fact = 1.39;
150512c028f9SKris Buschelman     break;
150677e54ba9SKris Buschelman   case MAT_SYMMETRIC:
150777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15082188ac68SBarry Smith   case MAT_HERMITIAN:
1509c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
15102188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
151143674050SBarry Smith     MatCheckPreallocated(A,1);
15124e0d8c25SBarry Smith     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
151377e54ba9SKris Buschelman     break;
151412c028f9SKris Buschelman   default:
1515ce94432eSBarry Smith     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1516d64ed03dSBarry Smith   }
15173a40ed3dSBarry Smith   PetscFunctionReturn(0);
151858667388SSatish Balay }
151958667388SSatish Balay 
1520fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
15210ac07820SSatish Balay {
15220ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
15230ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
15240ac07820SSatish Balay   Mat            B;
1525dfbe8321SBarry Smith   PetscErrorCode ierr;
1526d0f46423SBarry Smith   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1527d0f46423SBarry Smith   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
15283eda8832SBarry Smith   MatScalar      *a;
15290ac07820SSatish Balay 
1530d64ed03dSBarry Smith   PetscFunctionBegin;
1531cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1532ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1533d0f46423SBarry Smith     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
15347adad957SLisandro Dalcin     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
15352e72b8d9SBarry Smith     /* Do not know preallocation information, but must set block size */
15360298fd71SBarry Smith     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1537fc4dec0aSBarry Smith   } else {
1538fc4dec0aSBarry Smith     B = *matout;
1539fc4dec0aSBarry Smith   }
15400ac07820SSatish Balay 
15410ac07820SSatish Balay   /* copy over the A part */
15420ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15430ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1544785e854fSJed Brown   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
15450ac07820SSatish Balay 
15460ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1547899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
154826fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
15490ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1550899cda47SBarry Smith       col = (baij->cstartbs+aj[j])*bs;
15510ac07820SSatish Balay       for (k=0; k<bs; k++) {
155297e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
155326fbe8dcSKarl Rupp 
15540ac07820SSatish Balay         col++; a += bs;
15550ac07820SSatish Balay       }
15560ac07820SSatish Balay     }
15570ac07820SSatish Balay   }
15580ac07820SSatish Balay   /* copy over the B part */
15590ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
15600ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
15610ac07820SSatish Balay   for (i=0; i<mbs; i++) {
1562899cda47SBarry Smith     rvals[0] = bs*(baij->rstartbs + i);
156326fbe8dcSKarl Rupp     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
15640ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15650ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15660ac07820SSatish Balay       for (k=0; k<bs; k++) {
156797e5c40aSBarry Smith         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
156826fbe8dcSKarl Rupp         col++;
156926fbe8dcSKarl Rupp         a += bs;
15700ac07820SSatish Balay       }
15710ac07820SSatish Balay     }
15720ac07820SSatish Balay   }
1573606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
15740ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15750ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15760ac07820SSatish Balay 
1577cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
157826fbe8dcSKarl Rupp   else {
157928be2f97SBarry Smith     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
15800ac07820SSatish Balay   }
15813a40ed3dSBarry Smith   PetscFunctionReturn(0);
15820ac07820SSatish Balay }
15830e95ebc0SSatish Balay 
1584dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
15850e95ebc0SSatish Balay {
158636c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
158736c4a09eSSatish Balay   Mat            a     = baij->A,b = baij->B;
1588dfbe8321SBarry Smith   PetscErrorCode ierr;
1589b24ad042SBarry Smith   PetscInt       s1,s2,s3;
15900e95ebc0SSatish Balay 
1591d64ed03dSBarry Smith   PetscFunctionBegin;
159236c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
159336c4a09eSSatish Balay   if (rr) {
159436c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1595e32f2f54SBarry Smith     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
159636c4a09eSSatish Balay     /* Overlap communication with computation. */
1597ca9f406cSSatish Balay     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
159836c4a09eSSatish Balay   }
15990e95ebc0SSatish Balay   if (ll) {
16000e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1601e32f2f54SBarry Smith     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
16020298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
16030e95ebc0SSatish Balay   }
160436c4a09eSSatish Balay   /* scale  the diagonal block */
160536c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
160636c4a09eSSatish Balay 
160736c4a09eSSatish Balay   if (rr) {
160836c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1609ca9f406cSSatish Balay     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
16100298fd71SBarry Smith     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
161136c4a09eSSatish Balay   }
16123a40ed3dSBarry Smith   PetscFunctionReturn(0);
16130e95ebc0SSatish Balay }
16140e95ebc0SSatish Balay 
16152b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
16160ac07820SSatish Balay {
16170ac07820SSatish Balay   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
161865a92638SMatthew G. Knepley   PetscInt      *lrows;
16196e520ac8SStefano Zampini   PetscInt       r, len;
162094342113SStefano Zampini   PetscBool      cong;
16216849ba73SBarry Smith   PetscErrorCode ierr;
16220ac07820SSatish Balay 
1623d64ed03dSBarry Smith   PetscFunctionBegin;
16246e520ac8SStefano Zampini   /* get locally owned rows */
16256e520ac8SStefano Zampini   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
162697b48c8fSBarry Smith   /* fix right hand side if needed */
162797b48c8fSBarry Smith   if (x && b) {
162865a92638SMatthew G. Knepley     const PetscScalar *xx;
162965a92638SMatthew G. Knepley     PetscScalar       *bb;
163065a92638SMatthew G. Knepley 
163197b48c8fSBarry Smith     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
163297b48c8fSBarry Smith     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
163365a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
163497b48c8fSBarry Smith     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
163597b48c8fSBarry Smith     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
163697b48c8fSBarry Smith   }
163797b48c8fSBarry Smith 
16380ac07820SSatish Balay   /* actually zap the local rows */
163972dacd9aSBarry Smith   /*
164072dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
1641a8c7a070SBarry Smith      is square and the user wishes to set the diagonal we use separate
164272dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
164372dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
164472dacd9aSBarry Smith 
164572dacd9aSBarry Smith   */
16469c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1647a34163a4SJed Brown   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
164894342113SStefano Zampini   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
164994342113SStefano Zampini   if ((diag != 0.0) && cong) {
1650a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1651f4df32b1SMatthew Knepley   } else if (diag != 0.0) {
165265a92638SMatthew G. Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1653e7e72b3dSBarry Smith     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1654512a5fc5SBarry Smith        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
165565a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) {
165665a92638SMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
1657f4df32b1SMatthew Knepley       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1658a07cd24cSSatish Balay     }
1659a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1660a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16619c957beeSSatish Balay   } else {
1662a34163a4SJed Brown     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1663a07cd24cSSatish Balay   }
1664606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
16654f9cfa9eSBarry Smith 
16664f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
16674f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1668e56f5c9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1669b2566f29SBarry Smith     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1670e56f5c9eSBarry Smith   }
16713a40ed3dSBarry Smith   PetscFunctionReturn(0);
16720ac07820SSatish Balay }
167372dacd9aSBarry Smith 
16746f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
16756f0a72daSMatthew G. Knepley {
16766f0a72daSMatthew G. Knepley   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
16776f0a72daSMatthew G. Knepley   PetscErrorCode    ierr;
1678131c27b5Sprj-   PetscMPIInt       n = A->rmap->n,p = 0;
1679131c27b5Sprj-   PetscInt          i,j,k,r,len = 0,row,col,count;
16806f0a72daSMatthew G. Knepley   PetscInt          *lrows,*owners = A->rmap->range;
16816f0a72daSMatthew G. Knepley   PetscSFNode       *rrows;
16826f0a72daSMatthew G. Knepley   PetscSF           sf;
16836f0a72daSMatthew G. Knepley   const PetscScalar *xx;
16846f0a72daSMatthew G. Knepley   PetscScalar       *bb,*mask;
16856f0a72daSMatthew G. Knepley   Vec               xmask,lmask;
16866f0a72daSMatthew G. Knepley   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
16876f0a72daSMatthew G. Knepley   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
16886f0a72daSMatthew G. Knepley   PetscScalar       *aa;
16896f0a72daSMatthew G. Knepley 
16906f0a72daSMatthew G. Knepley   PetscFunctionBegin;
16916f0a72daSMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
16926f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
16936f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
16946f0a72daSMatthew G. Knepley   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
16956f0a72daSMatthew G. Knepley   for (r = 0; r < N; ++r) {
16966f0a72daSMatthew G. Knepley     const PetscInt idx   = rows[r];
16975ba17502SJed Brown     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
16985ba17502SJed Brown     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
16995ba17502SJed Brown       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
17005ba17502SJed Brown     }
17016f0a72daSMatthew G. Knepley     rrows[r].rank  = p;
17026f0a72daSMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
17036f0a72daSMatthew G. Knepley   }
17046f0a72daSMatthew G. Knepley   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
17056f0a72daSMatthew G. Knepley   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
17066f0a72daSMatthew G. Knepley   /* Collect flags for rows to be zeroed */
17076f0a72daSMatthew G. Knepley   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
17086f0a72daSMatthew G. Knepley   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
17096f0a72daSMatthew G. Knepley   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
17106f0a72daSMatthew G. Knepley   /* Compress and put in row numbers */
17116f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
17126f0a72daSMatthew G. Knepley   /* zero diagonal part of matrix */
17136f0a72daSMatthew G. Knepley   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
17146f0a72daSMatthew G. Knepley   /* handle off diagonal part of matrix */
17152a7a6963SBarry Smith   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
17166f0a72daSMatthew G. Knepley   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
17176f0a72daSMatthew G. Knepley   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
17186f0a72daSMatthew G. Knepley   for (i=0; i<len; i++) bb[lrows[i]] = 1;
17196f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
17206f0a72daSMatthew G. Knepley   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17216f0a72daSMatthew G. Knepley   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17226f0a72daSMatthew G. Knepley   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
17236f0a72daSMatthew G. Knepley   if (x) {
17246f0a72daSMatthew G. Knepley     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17256f0a72daSMatthew G. Knepley     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
17266f0a72daSMatthew G. Knepley     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
17276f0a72daSMatthew G. Knepley     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
17286f0a72daSMatthew G. Knepley   }
17296f0a72daSMatthew G. Knepley   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
17306f0a72daSMatthew G. Knepley   /* remove zeroed rows of off diagonal matrix */
17316f0a72daSMatthew G. Knepley   for (i = 0; i < len; ++i) {
17326f0a72daSMatthew G. Knepley     row   = lrows[i];
17336f0a72daSMatthew G. Knepley     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
17346f0a72daSMatthew G. Knepley     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
17356f0a72daSMatthew G. Knepley     for (k = 0; k < count; ++k) {
17366f0a72daSMatthew G. Knepley       aa[0] = 0.0;
17376f0a72daSMatthew G. Knepley       aa   += bs;
17386f0a72daSMatthew G. Knepley     }
17396f0a72daSMatthew G. Knepley   }
17406f0a72daSMatthew G. Knepley   /* loop over all elements of off process part of matrix zeroing removed columns*/
17416f0a72daSMatthew G. Knepley   for (i = 0; i < l->B->rmap->N; ++i) {
17426f0a72daSMatthew G. Knepley     row = i/bs;
17436f0a72daSMatthew G. Knepley     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
17446f0a72daSMatthew G. Knepley       for (k = 0; k < bs; ++k) {
17456f0a72daSMatthew G. Knepley         col = bs*baij->j[j] + k;
17466f0a72daSMatthew G. Knepley         if (PetscAbsScalar(mask[col])) {
17476f0a72daSMatthew G. Knepley           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
174889ae1891SBarry Smith           if (x) bb[i] -= aa[0]*xx[col];
17496f0a72daSMatthew G. Knepley           aa[0] = 0.0;
17506f0a72daSMatthew G. Knepley         }
17516f0a72daSMatthew G. Knepley       }
17526f0a72daSMatthew G. Knepley     }
17536f0a72daSMatthew G. Knepley   }
17546f0a72daSMatthew G. Knepley   if (x) {
17556f0a72daSMatthew G. Knepley     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
17566f0a72daSMatthew G. Knepley     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
17576f0a72daSMatthew G. Knepley   }
17586f0a72daSMatthew G. Knepley   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
17596f0a72daSMatthew G. Knepley   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
17606f0a72daSMatthew G. Knepley   ierr = PetscFree(lrows);CHKERRQ(ierr);
17614f9cfa9eSBarry Smith 
17624f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
17634f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
17644f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1765b2566f29SBarry Smith     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
17664f9cfa9eSBarry Smith   }
17676f0a72daSMatthew G. Knepley   PetscFunctionReturn(0);
17686f0a72daSMatthew G. Knepley }
17696f0a72daSMatthew G. Knepley 
1770dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1771bb5a7306SBarry Smith {
1772bb5a7306SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1773dfbe8321SBarry Smith   PetscErrorCode ierr;
1774d64ed03dSBarry Smith 
1775d64ed03dSBarry Smith   PetscFunctionBegin;
1776bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
17773a40ed3dSBarry Smith   PetscFunctionReturn(0);
1778bb5a7306SBarry Smith }
1779bb5a7306SBarry Smith 
17806849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
17810ac07820SSatish Balay 
1782ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
17837fc3c18eSBarry Smith {
17847fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
17857fc3c18eSBarry Smith   Mat            a,b,c,d;
1786ace3abfcSBarry Smith   PetscBool      flg;
1787dfbe8321SBarry Smith   PetscErrorCode ierr;
17887fc3c18eSBarry Smith 
17897fc3c18eSBarry Smith   PetscFunctionBegin;
17907fc3c18eSBarry Smith   a = matA->A; b = matA->B;
17917fc3c18eSBarry Smith   c = matB->A; d = matB->B;
17927fc3c18eSBarry Smith 
17937fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1794abc0a331SBarry Smith   if (flg) {
17957fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
17967fc3c18eSBarry Smith   }
1797b2566f29SBarry Smith   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
17987fc3c18eSBarry Smith   PetscFunctionReturn(0);
17997fc3c18eSBarry Smith }
18007fc3c18eSBarry Smith 
18013c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
18023c896bc6SHong Zhang {
18033c896bc6SHong Zhang   PetscErrorCode ierr;
18043c896bc6SHong Zhang   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
18053c896bc6SHong Zhang   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
18063c896bc6SHong Zhang 
18073c896bc6SHong Zhang   PetscFunctionBegin;
18083c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
18093c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
18103c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
18113c896bc6SHong Zhang   } else {
18123c896bc6SHong Zhang     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
18133c896bc6SHong Zhang     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
18143c896bc6SHong Zhang   }
1815cdc753b6SBarry Smith   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
18163c896bc6SHong Zhang   PetscFunctionReturn(0);
18173c896bc6SHong Zhang }
1818273d9f13SBarry Smith 
18194994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1820273d9f13SBarry Smith {
1821dfbe8321SBarry Smith   PetscErrorCode ierr;
1822273d9f13SBarry Smith 
1823273d9f13SBarry Smith   PetscFunctionBegin;
1824535b19f3SBarry Smith   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1825273d9f13SBarry Smith   PetscFunctionReturn(0);
1826273d9f13SBarry Smith }
1827273d9f13SBarry Smith 
18284de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
18294de5dceeSHong Zhang {
1830001ddc4fSHong Zhang   PetscErrorCode ierr;
1831001ddc4fSHong Zhang   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
18324de5dceeSHong Zhang   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
18334de5dceeSHong Zhang   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
18344de5dceeSHong Zhang 
18354de5dceeSHong Zhang   PetscFunctionBegin;
1836001ddc4fSHong Zhang   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
18374de5dceeSHong Zhang   PetscFunctionReturn(0);
18384de5dceeSHong Zhang }
18394de5dceeSHong Zhang 
18404fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
18414fe895cdSHong Zhang {
18424fe895cdSHong Zhang   PetscErrorCode ierr;
18434fe895cdSHong Zhang   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
18444fe895cdSHong Zhang   PetscBLASInt   bnz,one=1;
18454fe895cdSHong Zhang   Mat_SeqBAIJ    *x,*y;
1846b31f67cfSBarry Smith   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
18474fe895cdSHong Zhang 
18484fe895cdSHong Zhang   PetscFunctionBegin;
18494fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
18504fe895cdSHong Zhang     PetscScalar alpha = a;
18514fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->A->data;
18524fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->A->data;
1853b31f67cfSBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
18548b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
18554fe895cdSHong Zhang     x    = (Mat_SeqBAIJ*)xx->B->data;
18564fe895cdSHong Zhang     y    = (Mat_SeqBAIJ*)yy->B->data;
1857b31f67cfSBarry Smith     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
18588b83055fSJed Brown     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1859a3fa217bSJose E. Roman     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1860ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1861ab784542SHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
18624fe895cdSHong Zhang   } else {
18634de5dceeSHong Zhang     Mat      B;
18644de5dceeSHong Zhang     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
18654de5dceeSHong Zhang     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
18664de5dceeSHong Zhang     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
18674de5dceeSHong Zhang     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
18684de5dceeSHong Zhang     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
18694de5dceeSHong Zhang     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
18704de5dceeSHong Zhang     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
18714de5dceeSHong Zhang     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
18724de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
18734de5dceeSHong Zhang     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
18744de5dceeSHong Zhang     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
18754de5dceeSHong Zhang     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
18764de5dceeSHong Zhang     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
187728be2f97SBarry Smith     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
18784de5dceeSHong Zhang     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
18794de5dceeSHong Zhang     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
18804fe895cdSHong Zhang   }
18814fe895cdSHong Zhang   PetscFunctionReturn(0);
18824fe895cdSHong Zhang }
18834fe895cdSHong Zhang 
188499cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
188599cafbc1SBarry Smith {
188699cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
188799cafbc1SBarry Smith   PetscErrorCode ierr;
188899cafbc1SBarry Smith 
188999cafbc1SBarry Smith   PetscFunctionBegin;
189099cafbc1SBarry Smith   ierr = MatRealPart(a->A);CHKERRQ(ierr);
189199cafbc1SBarry Smith   ierr = MatRealPart(a->B);CHKERRQ(ierr);
189299cafbc1SBarry Smith   PetscFunctionReturn(0);
189399cafbc1SBarry Smith }
189499cafbc1SBarry Smith 
189599cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
189699cafbc1SBarry Smith {
189799cafbc1SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
189899cafbc1SBarry Smith   PetscErrorCode ierr;
189999cafbc1SBarry Smith 
190099cafbc1SBarry Smith   PetscFunctionBegin;
190199cafbc1SBarry Smith   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
190299cafbc1SBarry Smith   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
190399cafbc1SBarry Smith   PetscFunctionReturn(0);
190499cafbc1SBarry Smith }
190599cafbc1SBarry Smith 
19067dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
19074aa3045dSJed Brown {
19084aa3045dSJed Brown   PetscErrorCode ierr;
19094aa3045dSJed Brown   IS             iscol_local;
19104aa3045dSJed Brown   PetscInt       csize;
19114aa3045dSJed Brown 
19124aa3045dSJed Brown   PetscFunctionBegin;
19134aa3045dSJed Brown   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1914b79d0421SJed Brown   if (call == MAT_REUSE_MATRIX) {
1915b79d0421SJed Brown     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1916e32f2f54SBarry Smith     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1917b79d0421SJed Brown   } else {
19184aa3045dSJed Brown     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1919b79d0421SJed Brown   }
19207dae84e0SHong Zhang   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1921b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
1922b79d0421SJed Brown     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
19236bf464f9SBarry Smith     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1924b79d0421SJed Brown   }
19254aa3045dSJed Brown   PetscFunctionReturn(0);
19264aa3045dSJed Brown }
192717df9f7cSHong Zhang 
192882094794SBarry Smith /*
192982094794SBarry Smith   Not great since it makes two copies of the submatrix, first an SeqBAIJ
193082094794SBarry Smith   in local and then by concatenating the local matrices the end result.
19317dae84e0SHong Zhang   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
19328f46ffcaSHong Zhang   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
193382094794SBarry Smith */
19347dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
193582094794SBarry Smith {
193682094794SBarry Smith   PetscErrorCode ierr;
193782094794SBarry Smith   PetscMPIInt    rank,size;
193882094794SBarry Smith   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1939c9ffca76SHong Zhang   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
194029dcf524SDmitry Karpeev   Mat            M,Mreuse;
194182094794SBarry Smith   MatScalar      *vwork,*aa;
1942ce94432eSBarry Smith   MPI_Comm       comm;
194329dcf524SDmitry Karpeev   IS             isrow_new, iscol_new;
194482094794SBarry Smith   Mat_SeqBAIJ    *aij;
194582094794SBarry Smith 
194682094794SBarry Smith   PetscFunctionBegin;
1947ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
194882094794SBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
194982094794SBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
195029dcf524SDmitry Karpeev   /* The compression and expansion should be avoided. Doesn't point
195129dcf524SDmitry Karpeev      out errors, might change the indices, hence buggey */
195229dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
195329dcf524SDmitry Karpeev   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
195482094794SBarry Smith 
195582094794SBarry Smith   if (call ==  MAT_REUSE_MATRIX) {
195682094794SBarry Smith     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1957e32f2f54SBarry Smith     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
19587dae84e0SHong Zhang     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
195982094794SBarry Smith   } else {
19607dae84e0SHong Zhang     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
196182094794SBarry Smith   }
196229dcf524SDmitry Karpeev   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
196329dcf524SDmitry Karpeev   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
196482094794SBarry Smith   /*
196582094794SBarry Smith       m - number of local rows
196682094794SBarry Smith       n - number of columns (same on all processors)
196782094794SBarry Smith       rstart - first row in new global matrix generated
196882094794SBarry Smith   */
196982094794SBarry Smith   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
197082094794SBarry Smith   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
197182094794SBarry Smith   m    = m/bs;
197282094794SBarry Smith   n    = n/bs;
197382094794SBarry Smith 
197482094794SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
197582094794SBarry Smith     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
197682094794SBarry Smith     ii  = aij->i;
197782094794SBarry Smith     jj  = aij->j;
197882094794SBarry Smith 
197982094794SBarry Smith     /*
198082094794SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
198182094794SBarry Smith         portions of the matrix in order to do correct preallocation
198282094794SBarry Smith     */
198382094794SBarry Smith 
198482094794SBarry Smith     /* first get start and end of "diagonal" columns */
198582094794SBarry Smith     if (csize == PETSC_DECIDE) {
198682094794SBarry Smith       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
198782094794SBarry Smith       if (mglobal == n*bs) { /* square matrix */
198882094794SBarry Smith         nlocal = m;
198982094794SBarry Smith       } else {
199082094794SBarry Smith         nlocal = n/size + ((n % size) > rank);
199182094794SBarry Smith       }
199282094794SBarry Smith     } else {
199382094794SBarry Smith       nlocal = csize/bs;
199482094794SBarry Smith     }
199582094794SBarry Smith     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
199682094794SBarry Smith     rstart = rend - nlocal;
199765e19b50SBarry Smith     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
199882094794SBarry Smith 
199982094794SBarry Smith     /* next, compute all the lengths */
2000dcca6d9dSJed Brown     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
200182094794SBarry Smith     for (i=0; i<m; i++) {
200282094794SBarry Smith       jend = ii[i+1] - ii[i];
200382094794SBarry Smith       olen = 0;
200482094794SBarry Smith       dlen = 0;
200582094794SBarry Smith       for (j=0; j<jend; j++) {
200682094794SBarry Smith         if (*jj < rstart || *jj >= rend) olen++;
200782094794SBarry Smith         else dlen++;
200882094794SBarry Smith         jj++;
200982094794SBarry Smith       }
201082094794SBarry Smith       olens[i] = olen;
201182094794SBarry Smith       dlens[i] = dlen;
201282094794SBarry Smith     }
201382094794SBarry Smith     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
201482094794SBarry Smith     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
201582094794SBarry Smith     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
201682094794SBarry Smith     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
20178f46ffcaSHong Zhang     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2018eb9baa12SBarry Smith     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
201982094794SBarry Smith   } else {
202082094794SBarry Smith     PetscInt ml,nl;
202182094794SBarry Smith 
202282094794SBarry Smith     M    = *newmat;
202382094794SBarry Smith     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2024e32f2f54SBarry Smith     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
202582094794SBarry Smith     ierr = MatZeroEntries(M);CHKERRQ(ierr);
202682094794SBarry Smith     /*
202782094794SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
202882094794SBarry Smith        rather than the slower MatSetValues().
202982094794SBarry Smith     */
203082094794SBarry Smith     M->was_assembled = PETSC_TRUE;
203182094794SBarry Smith     M->assembled     = PETSC_FALSE;
203282094794SBarry Smith   }
203382094794SBarry Smith   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
203482094794SBarry Smith   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
203582094794SBarry Smith   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
203682094794SBarry Smith   ii   = aij->i;
203782094794SBarry Smith   jj   = aij->j;
203882094794SBarry Smith   aa   = aij->a;
203982094794SBarry Smith   for (i=0; i<m; i++) {
204082094794SBarry Smith     row   = rstart/bs + i;
204182094794SBarry Smith     nz    = ii[i+1] - ii[i];
204282094794SBarry Smith     cwork = jj;     jj += nz;
204375f6568bSJed Brown     vwork = aa;     aa += nz*bs*bs;
204482094794SBarry Smith     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
204582094794SBarry Smith   }
204682094794SBarry Smith 
204782094794SBarry Smith   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
204882094794SBarry Smith   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
204982094794SBarry Smith   *newmat = M;
205082094794SBarry Smith 
205182094794SBarry Smith   /* save submatrix used in processor for next request */
205282094794SBarry Smith   if (call ==  MAT_INITIAL_MATRIX) {
205382094794SBarry Smith     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
205482094794SBarry Smith     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
205582094794SBarry Smith   }
205682094794SBarry Smith   PetscFunctionReturn(0);
205782094794SBarry Smith }
205882094794SBarry Smith 
205982094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
206082094794SBarry Smith {
206182094794SBarry Smith   MPI_Comm       comm,pcomm;
2062a0a83eb5SRémi Lacroix   PetscInt       clocal_size,nrows;
206382094794SBarry Smith   const PetscInt *rows;
2064dbf0e21dSBarry Smith   PetscMPIInt    size;
2065a0a83eb5SRémi Lacroix   IS             crowp,lcolp;
206682094794SBarry Smith   PetscErrorCode ierr;
206782094794SBarry Smith 
206882094794SBarry Smith   PetscFunctionBegin;
206982094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
207082094794SBarry Smith   /* make a collective version of 'rowp' */
207182094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
207282094794SBarry Smith   if (pcomm==comm) {
207382094794SBarry Smith     crowp = rowp;
207482094794SBarry Smith   } else {
207582094794SBarry Smith     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
207682094794SBarry Smith     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
207770b3c8c7SBarry Smith     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
207882094794SBarry Smith     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
207982094794SBarry Smith   }
2080a0a83eb5SRémi Lacroix   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2081a0a83eb5SRémi Lacroix   /* make a local version of 'colp' */
208282094794SBarry Smith   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2083dbf0e21dSBarry Smith   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2084dbf0e21dSBarry Smith   if (size==1) {
208582094794SBarry Smith     lcolp = colp;
208682094794SBarry Smith   } else {
208775f6568bSJed Brown     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
208882094794SBarry Smith   }
2089dbf0e21dSBarry Smith   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
209075f6568bSJed Brown   /* now we just get the submatrix */
20917afc1a8bSJed Brown   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
20927dae84e0SHong Zhang   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2093a0a83eb5SRémi Lacroix   /* clean up */
2094a0a83eb5SRémi Lacroix   if (pcomm!=comm) {
2095a0a83eb5SRémi Lacroix     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2096a0a83eb5SRémi Lacroix   }
2097dbf0e21dSBarry Smith   if (size>1) {
20986bf464f9SBarry Smith     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
209982094794SBarry Smith   }
210082094794SBarry Smith   PetscFunctionReturn(0);
210182094794SBarry Smith }
210282094794SBarry Smith 
21037087cfbeSBarry Smith PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
21048c7482ecSBarry Smith {
21058c7482ecSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
21068c7482ecSBarry Smith   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
21078c7482ecSBarry Smith 
21088c7482ecSBarry Smith   PetscFunctionBegin;
210926fbe8dcSKarl Rupp   if (nghosts) *nghosts = B->nbs;
211026fbe8dcSKarl Rupp   if (ghosts) *ghosts = baij->garray;
21118c7482ecSBarry Smith   PetscFunctionReturn(0);
21128c7482ecSBarry Smith }
21138c7482ecSBarry Smith 
2114d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2115f6d58c54SBarry Smith {
2116f6d58c54SBarry Smith   Mat            B;
2117f6d58c54SBarry Smith   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2118f6d58c54SBarry Smith   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2119f6d58c54SBarry Smith   Mat_SeqAIJ     *b;
2120f6d58c54SBarry Smith   PetscErrorCode ierr;
2121f6d58c54SBarry Smith   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2122f6d58c54SBarry Smith   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2123f6d58c54SBarry Smith   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2124f6d58c54SBarry Smith 
2125f6d58c54SBarry Smith   PetscFunctionBegin;
2126ce94432eSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2127ce94432eSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2128f6d58c54SBarry Smith 
2129f6d58c54SBarry Smith   /* ----------------------------------------------------------------
2130f6d58c54SBarry Smith      Tell every processor the number of nonzeros per row
2131f6d58c54SBarry Smith   */
2132854ce69bSBarry Smith   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2133f6d58c54SBarry Smith   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2134f6d58c54SBarry Smith     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2135f6d58c54SBarry Smith   }
2136785e854fSJed Brown   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2137f6d58c54SBarry Smith   displs    = recvcounts + size;
2138f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2139f6d58c54SBarry Smith     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2140f6d58c54SBarry Smith     displs[i]     = A->rmap->range[i]/bs;
2141f6d58c54SBarry Smith   }
2142f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2143ce94432eSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144f6d58c54SBarry Smith #else
21453d3eaba7SBarry Smith   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2146ce94432eSBarry Smith   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2147f6d58c54SBarry Smith #endif
2148f6d58c54SBarry Smith   /* ---------------------------------------------------------------
2149f6d58c54SBarry Smith      Create the sequential matrix of the same type as the local block diagonal
2150f6d58c54SBarry Smith   */
2151f6d58c54SBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2152f6d58c54SBarry Smith   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2153f6d58c54SBarry Smith   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2154f6d58c54SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2155f6d58c54SBarry Smith   b    = (Mat_SeqAIJ*)B->data;
2156f6d58c54SBarry Smith 
2157f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2158f6d58c54SBarry Smith     Copy my part of matrix column indices over
2159f6d58c54SBarry Smith   */
2160f6d58c54SBarry Smith   sendcount  = ad->nz + bd->nz;
2161f6d58c54SBarry Smith   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2162f6d58c54SBarry Smith   a_jsendbuf = ad->j;
2163f6d58c54SBarry Smith   b_jsendbuf = bd->j;
2164f6d58c54SBarry Smith   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2165f6d58c54SBarry Smith   cnt        = 0;
2166f6d58c54SBarry Smith   for (i=0; i<n; i++) {
2167f6d58c54SBarry Smith 
2168f6d58c54SBarry Smith     /* put in lower diagonal portion */
2169f6d58c54SBarry Smith     m = bd->i[i+1] - bd->i[i];
2170f6d58c54SBarry Smith     while (m > 0) {
2171f6d58c54SBarry Smith       /* is it above diagonal (in bd (compressed) numbering) */
2172f6d58c54SBarry Smith       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2173f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2174f6d58c54SBarry Smith       m--;
2175f6d58c54SBarry Smith     }
2176f6d58c54SBarry Smith 
2177f6d58c54SBarry Smith     /* put in diagonal portion */
2178f6d58c54SBarry Smith     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2179f6d58c54SBarry Smith       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2180f6d58c54SBarry Smith     }
2181f6d58c54SBarry Smith 
2182f6d58c54SBarry Smith     /* put in upper diagonal portion */
2183f6d58c54SBarry Smith     while (m-- > 0) {
2184f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2185f6d58c54SBarry Smith     }
2186f6d58c54SBarry Smith   }
2187e32f2f54SBarry Smith   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2188f6d58c54SBarry Smith 
2189f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2190f6d58c54SBarry Smith     Gather all column indices to all processors
2191f6d58c54SBarry Smith   */
2192f6d58c54SBarry Smith   for (i=0; i<size; i++) {
2193f6d58c54SBarry Smith     recvcounts[i] = 0;
2194f6d58c54SBarry Smith     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2195f6d58c54SBarry Smith       recvcounts[i] += lens[j];
2196f6d58c54SBarry Smith     }
2197f6d58c54SBarry Smith   }
2198f6d58c54SBarry Smith   displs[0] = 0;
2199f6d58c54SBarry Smith   for (i=1; i<size; i++) {
2200f6d58c54SBarry Smith     displs[i] = displs[i-1] + recvcounts[i-1];
2201f6d58c54SBarry Smith   }
2202f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE)
2203ce94432eSBarry Smith   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2204f6d58c54SBarry Smith #else
2205ce94432eSBarry Smith   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2206f6d58c54SBarry Smith #endif
2207f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2208f6d58c54SBarry Smith     Assemble the matrix into useable form (note numerical values not yet set)
2209f6d58c54SBarry Smith   */
2210f6d58c54SBarry Smith   /* set the b->ilen (length of each row) values */
2211580bdb30SBarry Smith   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2212f6d58c54SBarry Smith   /* set the b->i indices */
2213f6d58c54SBarry Smith   b->i[0] = 0;
2214f6d58c54SBarry Smith   for (i=1; i<=A->rmap->N/bs; i++) {
2215f6d58c54SBarry Smith     b->i[i] = b->i[i-1] + lens[i-1];
2216f6d58c54SBarry Smith   }
2217f6d58c54SBarry Smith   ierr = PetscFree(lens);CHKERRQ(ierr);
2218f6d58c54SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2219f6d58c54SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2220f6d58c54SBarry Smith   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2221f6d58c54SBarry Smith 
2222f6d58c54SBarry Smith   if (A->symmetric) {
2223f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2224f6d58c54SBarry Smith   } else if (A->hermitian) {
2225f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2226f6d58c54SBarry Smith   } else if (A->structurally_symmetric) {
2227f6d58c54SBarry Smith     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2228f6d58c54SBarry Smith   }
2229f6d58c54SBarry Smith   *newmat = B;
2230f6d58c54SBarry Smith   PetscFunctionReturn(0);
2231f6d58c54SBarry Smith }
2232f6d58c54SBarry Smith 
2233b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2234b1a666ecSBarry Smith {
2235b1a666ecSBarry Smith   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2236b1a666ecSBarry Smith   PetscErrorCode ierr;
2237b1a666ecSBarry Smith   Vec            bb1 = 0;
2238b1a666ecSBarry Smith 
2239b1a666ecSBarry Smith   PetscFunctionBegin;
2240b1a666ecSBarry Smith   if (flag == SOR_APPLY_UPPER) {
2241b1a666ecSBarry Smith     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2242b1a666ecSBarry Smith     PetscFunctionReturn(0);
2243b1a666ecSBarry Smith   }
2244b1a666ecSBarry Smith 
22454e980039SJed Brown   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
22464e980039SJed Brown     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
22474e980039SJed Brown   }
22484e980039SJed Brown 
2249b1a666ecSBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2250b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2251b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2252b1a666ecSBarry Smith       its--;
2253b1a666ecSBarry Smith     }
2254b1a666ecSBarry Smith 
2255b1a666ecSBarry Smith     while (its--) {
2256b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2257b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2258b1a666ecSBarry Smith 
2259b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2260b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2261b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2262b1a666ecSBarry Smith 
2263b1a666ecSBarry Smith       /* local sweep */
2264b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2265b1a666ecSBarry Smith     }
2266b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2267b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2268b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2269b1a666ecSBarry Smith       its--;
2270b1a666ecSBarry Smith     }
2271b1a666ecSBarry Smith     while (its--) {
2272b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2273b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2274b1a666ecSBarry Smith 
2275b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2276b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2277b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2278b1a666ecSBarry Smith 
2279b1a666ecSBarry Smith       /* local sweep */
2280b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2281b1a666ecSBarry Smith     }
2282b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2283b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
2284b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2285b1a666ecSBarry Smith       its--;
2286b1a666ecSBarry Smith     }
2287b1a666ecSBarry Smith     while (its--) {
2288b1a666ecSBarry Smith       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2289b1a666ecSBarry Smith       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2290b1a666ecSBarry Smith 
2291b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
2292b1a666ecSBarry Smith       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2293b1a666ecSBarry Smith       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2294b1a666ecSBarry Smith 
2295b1a666ecSBarry Smith       /* local sweep */
2296b1a666ecSBarry Smith       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2297b1a666ecSBarry Smith     }
2298ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2299b1a666ecSBarry Smith 
23006bf464f9SBarry Smith   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2301b1a666ecSBarry Smith   PetscFunctionReturn(0);
2302b1a666ecSBarry Smith }
2303b1a666ecSBarry Smith 
230447f7623dSRémi Lacroix PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
230547f7623dSRémi Lacroix {
230647f7623dSRémi Lacroix   PetscErrorCode ierr;
230747f7623dSRémi Lacroix   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
230847f7623dSRémi Lacroix   PetscInt       N,i,*garray = aij->garray;
230947f7623dSRémi Lacroix   PetscInt       ib,jb,bs = A->rmap->bs;
231047f7623dSRémi Lacroix   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
231147f7623dSRémi Lacroix   MatScalar      *a_val = a_aij->a;
231247f7623dSRémi Lacroix   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
231347f7623dSRémi Lacroix   MatScalar      *b_val = b_aij->a;
231447f7623dSRémi Lacroix   PetscReal      *work;
231547f7623dSRémi Lacroix 
231647f7623dSRémi Lacroix   PetscFunctionBegin;
231747f7623dSRémi Lacroix   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
23181795a4d1SJed Brown   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
231947f7623dSRémi Lacroix   if (type == NORM_2) {
232047f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
232147f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
232247f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
232347f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
232447f7623dSRémi Lacroix           a_val++;
232547f7623dSRémi Lacroix         }
232647f7623dSRémi Lacroix       }
232747f7623dSRémi Lacroix     }
232847f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
232947f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
233047f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
233147f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
233247f7623dSRémi Lacroix           b_val++;
233347f7623dSRémi Lacroix         }
233447f7623dSRémi Lacroix       }
233547f7623dSRémi Lacroix     }
233647f7623dSRémi Lacroix   } else if (type == NORM_1) {
233747f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
233847f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
233947f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
234047f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
234147f7623dSRémi Lacroix           a_val++;
234247f7623dSRémi Lacroix         }
234347f7623dSRémi Lacroix       }
234447f7623dSRémi Lacroix     }
234547f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
234647f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
234747f7623dSRémi Lacroix        for (ib=0; ib<bs; ib++) {
234847f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
234947f7623dSRémi Lacroix           b_val++;
235047f7623dSRémi Lacroix         }
235147f7623dSRémi Lacroix       }
235247f7623dSRémi Lacroix     }
235347f7623dSRémi Lacroix   } else if (type == NORM_INFINITY) {
235447f7623dSRémi Lacroix     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
235547f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
235647f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
235747f7623dSRémi Lacroix           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
235847f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
235947f7623dSRémi Lacroix           a_val++;
236047f7623dSRémi Lacroix         }
236147f7623dSRémi Lacroix       }
236247f7623dSRémi Lacroix     }
236347f7623dSRémi Lacroix     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
236447f7623dSRémi Lacroix       for (jb=0; jb<bs; jb++) {
236547f7623dSRémi Lacroix         for (ib=0; ib<bs; ib++) {
236647f7623dSRémi Lacroix           int col = garray[b_aij->j[i]] * bs + jb;
236747f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
236847f7623dSRémi Lacroix           b_val++;
236947f7623dSRémi Lacroix         }
237047f7623dSRémi Lacroix       }
237147f7623dSRémi Lacroix     }
237247f7623dSRémi Lacroix   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
237347f7623dSRémi Lacroix   if (type == NORM_INFINITY) {
2374b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
237547f7623dSRémi Lacroix   } else {
2376b2566f29SBarry Smith     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
237747f7623dSRémi Lacroix   }
237847f7623dSRémi Lacroix   ierr = PetscFree(work);CHKERRQ(ierr);
237947f7623dSRémi Lacroix   if (type == NORM_2) {
238047f7623dSRémi Lacroix     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
238147f7623dSRémi Lacroix   }
238247f7623dSRémi Lacroix   PetscFunctionReturn(0);
238347f7623dSRémi Lacroix }
238447f7623dSRémi Lacroix 
2385713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2386bbead8a2SBarry Smith {
2387bbead8a2SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2388bbead8a2SBarry Smith   PetscErrorCode ierr;
2389bbead8a2SBarry Smith 
2390bbead8a2SBarry Smith   PetscFunctionBegin;
2391bbead8a2SBarry Smith   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
23927b6c816cSBarry Smith   A->factorerrortype             = a->A->factorerrortype;
23937b6c816cSBarry Smith   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
23947b6c816cSBarry Smith   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2395bbead8a2SBarry Smith   PetscFunctionReturn(0);
2396bbead8a2SBarry Smith }
2397bbead8a2SBarry Smith 
23987d68702bSBarry Smith PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
23997d68702bSBarry Smith {
24007d68702bSBarry Smith   PetscErrorCode ierr;
24017d68702bSBarry Smith   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
24026f33a894SBarry Smith   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
24037d68702bSBarry Smith 
24047d68702bSBarry Smith   PetscFunctionBegin;
24056f33a894SBarry Smith   if (!Y->preallocated) {
24067d68702bSBarry Smith     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
24076f33a894SBarry Smith   } else if (!aij->nz) {
2408b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
24096f33a894SBarry Smith     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2410b83222d8SBarry Smith     aij->nonew = nonew;
24117d68702bSBarry Smith   }
24127d68702bSBarry Smith   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
24137d68702bSBarry Smith   PetscFunctionReturn(0);
24147d68702bSBarry Smith }
24158c7482ecSBarry Smith 
24163b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
24173b49f96aSBarry Smith {
24183b49f96aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
24193b49f96aSBarry Smith   PetscErrorCode ierr;
24203b49f96aSBarry Smith 
24213b49f96aSBarry Smith   PetscFunctionBegin;
24223b49f96aSBarry Smith   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
24233b49f96aSBarry Smith   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
24243b49f96aSBarry Smith   if (d) {
24253b49f96aSBarry Smith     PetscInt rstart;
24263b49f96aSBarry Smith     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
24273b49f96aSBarry Smith     *d += rstart/A->rmap->bs;
24283b49f96aSBarry Smith 
24293b49f96aSBarry Smith   }
24303b49f96aSBarry Smith   PetscFunctionReturn(0);
24313b49f96aSBarry Smith }
24323b49f96aSBarry Smith 
2433a5b7ff6bSBarry Smith PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2434a5b7ff6bSBarry Smith {
2435a5b7ff6bSBarry Smith   PetscFunctionBegin;
2436a5b7ff6bSBarry Smith   *a = ((Mat_MPIBAIJ*)A->data)->A;
2437a5b7ff6bSBarry Smith   PetscFunctionReturn(0);
2438a5b7ff6bSBarry Smith }
2439a5b7ff6bSBarry Smith 
244079bdfe76SSatish Balay /* -------------------------------------------------------------------*/
24413964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2442cc2dc46cSBarry Smith                                        MatGetRow_MPIBAIJ,
2443cc2dc46cSBarry Smith                                        MatRestoreRow_MPIBAIJ,
2444cc2dc46cSBarry Smith                                        MatMult_MPIBAIJ,
244597304618SKris Buschelman                                 /* 4*/ MatMultAdd_MPIBAIJ,
24467c922b88SBarry Smith                                        MatMultTranspose_MPIBAIJ,
24477c922b88SBarry Smith                                        MatMultTransposeAdd_MPIBAIJ,
2448cc2dc46cSBarry Smith                                        0,
2449cc2dc46cSBarry Smith                                        0,
2450cc2dc46cSBarry Smith                                        0,
245197304618SKris Buschelman                                 /*10*/ 0,
2452cc2dc46cSBarry Smith                                        0,
2453cc2dc46cSBarry Smith                                        0,
2454b1a666ecSBarry Smith                                        MatSOR_MPIBAIJ,
2455cc2dc46cSBarry Smith                                        MatTranspose_MPIBAIJ,
245697304618SKris Buschelman                                 /*15*/ MatGetInfo_MPIBAIJ,
24577fc3c18eSBarry Smith                                        MatEqual_MPIBAIJ,
2458cc2dc46cSBarry Smith                                        MatGetDiagonal_MPIBAIJ,
2459cc2dc46cSBarry Smith                                        MatDiagonalScale_MPIBAIJ,
2460cc2dc46cSBarry Smith                                        MatNorm_MPIBAIJ,
246197304618SKris Buschelman                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2462cc2dc46cSBarry Smith                                        MatAssemblyEnd_MPIBAIJ,
2463cc2dc46cSBarry Smith                                        MatSetOption_MPIBAIJ,
2464cc2dc46cSBarry Smith                                        MatZeroEntries_MPIBAIJ,
2465d519adbfSMatthew Knepley                                 /*24*/ MatZeroRows_MPIBAIJ,
2466cc2dc46cSBarry Smith                                        0,
2467cc2dc46cSBarry Smith                                        0,
2468cc2dc46cSBarry Smith                                        0,
2469cc2dc46cSBarry Smith                                        0,
24704994cf47SJed Brown                                 /*29*/ MatSetUp_MPIBAIJ,
2471273d9f13SBarry Smith                                        0,
2472cc2dc46cSBarry Smith                                        0,
2473a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPIBAIJ,
2474cc2dc46cSBarry Smith                                        0,
2475d519adbfSMatthew Knepley                                 /*34*/ MatDuplicate_MPIBAIJ,
2476cc2dc46cSBarry Smith                                        0,
2477cc2dc46cSBarry Smith                                        0,
2478cc2dc46cSBarry Smith                                        0,
2479cc2dc46cSBarry Smith                                        0,
2480d519adbfSMatthew Knepley                                 /*39*/ MatAXPY_MPIBAIJ,
24817dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIBAIJ,
2482cc2dc46cSBarry Smith                                        MatIncreaseOverlap_MPIBAIJ,
2483cc2dc46cSBarry Smith                                        MatGetValues_MPIBAIJ,
24843c896bc6SHong Zhang                                        MatCopy_MPIBAIJ,
2485d519adbfSMatthew Knepley                                 /*44*/ 0,
2486cc2dc46cSBarry Smith                                        MatScale_MPIBAIJ,
24877d68702bSBarry Smith                                        MatShift_MPIBAIJ,
2488cc2dc46cSBarry Smith                                        0,
24896f0a72daSMatthew G. Knepley                                        MatZeroRowsColumns_MPIBAIJ,
2490f73d5cc4SBarry Smith                                 /*49*/ 0,
2491cc2dc46cSBarry Smith                                        0,
2492cc2dc46cSBarry Smith                                        0,
2493cc2dc46cSBarry Smith                                        0,
2494cc2dc46cSBarry Smith                                        0,
249593dfae19SHong Zhang                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2496cc2dc46cSBarry Smith                                        0,
2497cc2dc46cSBarry Smith                                        MatSetUnfactored_MPIBAIJ,
249882094794SBarry Smith                                        MatPermute_MPIBAIJ,
2499cc2dc46cSBarry Smith                                        MatSetValuesBlocked_MPIBAIJ,
25007dae84e0SHong Zhang                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2501f14a1c24SBarry Smith                                        MatDestroy_MPIBAIJ,
2502f14a1c24SBarry Smith                                        MatView_MPIBAIJ,
2503357abbc8SBarry Smith                                        0,
25047843d17aSBarry Smith                                        0,
2505d519adbfSMatthew Knepley                                 /*64*/ 0,
25067843d17aSBarry Smith                                        0,
25077843d17aSBarry Smith                                        0,
25087843d17aSBarry Smith                                        0,
25097843d17aSBarry Smith                                        0,
2510d519adbfSMatthew Knepley                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
25117843d17aSBarry Smith                                        0,
251297304618SKris Buschelman                                        0,
251397304618SKris Buschelman                                        0,
251497304618SKris Buschelman                                        0,
2515d519adbfSMatthew Knepley                                 /*74*/ 0,
2516f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
251797304618SKris Buschelman                                        0,
251897304618SKris Buschelman                                        0,
251997304618SKris Buschelman                                        0,
2520d519adbfSMatthew Knepley                                 /*79*/ 0,
252197304618SKris Buschelman                                        0,
252297304618SKris Buschelman                                        0,
252397304618SKris Buschelman                                        0,
25245bba2384SShri Abhyankar                                        MatLoad_MPIBAIJ,
2525d519adbfSMatthew Knepley                                 /*84*/ 0,
2526865e5f61SKris Buschelman                                        0,
2527865e5f61SKris Buschelman                                        0,
2528865e5f61SKris Buschelman                                        0,
2529865e5f61SKris Buschelman                                        0,
2530d519adbfSMatthew Knepley                                 /*89*/ 0,
2531865e5f61SKris Buschelman                                        0,
2532865e5f61SKris Buschelman                                        0,
2533865e5f61SKris Buschelman                                        0,
2534865e5f61SKris Buschelman                                        0,
2535d519adbfSMatthew Knepley                                 /*94*/ 0,
2536865e5f61SKris Buschelman                                        0,
2537865e5f61SKris Buschelman                                        0,
253899cafbc1SBarry Smith                                        0,
253999cafbc1SBarry Smith                                        0,
2540d519adbfSMatthew Knepley                                 /*99*/ 0,
254199cafbc1SBarry Smith                                        0,
254299cafbc1SBarry Smith                                        0,
254399cafbc1SBarry Smith                                        0,
254499cafbc1SBarry Smith                                        0,
2545d519adbfSMatthew Knepley                                 /*104*/0,
254699cafbc1SBarry Smith                                        MatRealPart_MPIBAIJ,
25478c7482ecSBarry Smith                                        MatImaginaryPart_MPIBAIJ,
25488c7482ecSBarry Smith                                        0,
25498c7482ecSBarry Smith                                        0,
2550d519adbfSMatthew Knepley                                 /*109*/0,
25518c7482ecSBarry Smith                                        0,
25528c7482ecSBarry Smith                                        0,
25538c7482ecSBarry Smith                                        0,
25543b49f96aSBarry Smith                                        MatMissingDiagonal_MPIBAIJ,
2555d1adec66SJed Brown                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
25568c7482ecSBarry Smith                                        0,
25574683f7a4SShri Abhyankar                                        MatGetGhosts_MPIBAIJ,
25584683f7a4SShri Abhyankar                                        0,
25594683f7a4SShri Abhyankar                                        0,
25604683f7a4SShri Abhyankar                                 /*119*/0,
25614683f7a4SShri Abhyankar                                        0,
25624683f7a4SShri Abhyankar                                        0,
2563bbead8a2SBarry Smith                                        0,
2564e8271787SHong Zhang                                        MatGetMultiProcBlock_MPIBAIJ,
2565bbead8a2SBarry Smith                                 /*124*/0,
256647f7623dSRémi Lacroix                                        MatGetColumnNorms_MPIBAIJ,
25673964eb88SJed Brown                                        MatInvertBlockDiagonal_MPIBAIJ,
25683964eb88SJed Brown                                        0,
25693964eb88SJed Brown                                        0,
25703964eb88SJed Brown                                /*129*/ 0,
25713964eb88SJed Brown                                        0,
25723964eb88SJed Brown                                        0,
25733964eb88SJed Brown                                        0,
25743964eb88SJed Brown                                        0,
25753964eb88SJed Brown                                /*134*/ 0,
25763964eb88SJed Brown                                        0,
25773964eb88SJed Brown                                        0,
25783964eb88SJed Brown                                        0,
25793964eb88SJed Brown                                        0,
258046533700Sstefano_zampini                                /*139*/ MatSetBlockSizes_Default,
2581f9426fe0SMark Adams                                        0,
25821919a2e2SJed Brown                                        0,
2583bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_MPIXAIJ,
2584bdf6f3fcSHong Zhang                                        0,
2585bdf6f3fcSHong Zhang                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
25868c7482ecSBarry Smith };
258779bdfe76SSatish Balay 
258879bdfe76SSatish Balay 
2589cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2590c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2591d94109b8SHong Zhang 
2592cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2593aac34f13SBarry Smith {
2594b8d659d7SLisandro Dalcin   PetscInt       m,rstart,cstart,cend;
259537cd3c0dSBarry Smith   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2596b8d659d7SLisandro Dalcin   const PetscInt *JJ    =0;
2597b8d659d7SLisandro Dalcin   PetscScalar    *values=0;
2598d47bf9aaSJed Brown   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2599aac34f13SBarry Smith   PetscErrorCode ierr;
26003bd0feecSPierre Jolivet   PetscBool      nooffprocentries;
2601aac34f13SBarry Smith 
2602aac34f13SBarry Smith   PetscFunctionBegin;
260326283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
260426283091SBarry Smith   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
260526283091SBarry Smith   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
260626283091SBarry Smith   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2607e02043d6SBarry Smith   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2608d0f46423SBarry Smith   m      = B->rmap->n/bs;
2609d0f46423SBarry Smith   rstart = B->rmap->rstart/bs;
2610d0f46423SBarry Smith   cstart = B->cmap->rstart/bs;
2611d0f46423SBarry Smith   cend   = B->cmap->rend/bs;
2612b8d659d7SLisandro Dalcin 
2613e32f2f54SBarry Smith   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2614dcca6d9dSJed Brown   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2615aac34f13SBarry Smith   for (i=0; i<m; i++) {
2616cf12db73SBarry Smith     nz = ii[i+1] - ii[i];
2617e32f2f54SBarry Smith     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2618b8d659d7SLisandro Dalcin     nz_max = PetscMax(nz_max,nz);
261937cd3c0dSBarry Smith     dlen   = 0;
262037cd3c0dSBarry Smith     olen   = 0;
2621cf12db73SBarry Smith     JJ     = jj + ii[i];
2622b8d659d7SLisandro Dalcin     for (j=0; j<nz; j++) {
262337cd3c0dSBarry Smith       if (*JJ < cstart || *JJ >= cend) olen++;
262437cd3c0dSBarry Smith       else dlen++;
2625aac34f13SBarry Smith       JJ++;
2626aac34f13SBarry Smith     }
262737cd3c0dSBarry Smith     d_nnz[i] = dlen;
262837cd3c0dSBarry Smith     o_nnz[i] = olen;
2629aac34f13SBarry Smith   }
2630aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2631fca92195SBarry Smith   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2632aac34f13SBarry Smith 
2633b8d659d7SLisandro Dalcin   values = (PetscScalar*)V;
2634b8d659d7SLisandro Dalcin   if (!values) {
263537cd3c0dSBarry Smith     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2636b8d659d7SLisandro Dalcin   }
2637b8d659d7SLisandro Dalcin   for (i=0; i<m; i++) {
2638b8d659d7SLisandro Dalcin     PetscInt          row    = i + rstart;
2639cf12db73SBarry Smith     PetscInt          ncols  = ii[i+1] - ii[i];
2640cf12db73SBarry Smith     const PetscInt    *icols = jj + ii[i];
2641bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2642cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2643b8d659d7SLisandro Dalcin       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
26443adadaf3SJed Brown     } else {                    /* block ordering does not match so we can only insert one block at a time. */
26453adadaf3SJed Brown       PetscInt j;
26463adadaf3SJed Brown       for (j=0; j<ncols; j++) {
26473adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
26483adadaf3SJed Brown         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
26493adadaf3SJed Brown       }
26503adadaf3SJed Brown     }
2651aac34f13SBarry Smith   }
2652aac34f13SBarry Smith 
2653b8d659d7SLisandro Dalcin   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
26543bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
26553bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
2656aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2657aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
26583bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
26593bd0feecSPierre Jolivet 
26607827cd58SJed Brown   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2661aac34f13SBarry Smith   PetscFunctionReturn(0);
2662aac34f13SBarry Smith }
2663aac34f13SBarry Smith 
2664aac34f13SBarry Smith /*@C
2665664954b6SBarry Smith    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2666aac34f13SBarry Smith 
2667d083f849SBarry Smith    Collective
2668aac34f13SBarry Smith 
2669aac34f13SBarry Smith    Input Parameters:
26701c4f3114SJed Brown +  B - the matrix
2671dfb205c3SBarry Smith .  bs - the block size
2672aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2673aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2674aac34f13SBarry Smith -  v - optional values in the matrix
2675aac34f13SBarry Smith 
2676664954b6SBarry Smith    Level: advanced
2677aac34f13SBarry Smith 
267895452b02SPatrick Sanan    Notes:
267995452b02SPatrick Sanan     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
26803adadaf3SJed Brown    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
26813adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
26823adadaf3SJed Brown    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
26833adadaf3SJed Brown    block column and the second index is over columns within a block.
26843adadaf3SJed Brown 
2685664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2686664954b6SBarry Smith 
26873adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2688aac34f13SBarry Smith @*/
26897087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2690aac34f13SBarry Smith {
26914ac538c5SBarry Smith   PetscErrorCode ierr;
2692aac34f13SBarry Smith 
2693aac34f13SBarry Smith   PetscFunctionBegin;
26946ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
26956ba663aaSJed Brown   PetscValidType(B,1);
26966ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
26974ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2698aac34f13SBarry Smith   PetscFunctionReturn(0);
2699aac34f13SBarry Smith }
2700aac34f13SBarry Smith 
2701b2573a8aSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2702a23d5eceSKris Buschelman {
2703a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2704dfbe8321SBarry Smith   PetscErrorCode ierr;
2705535b19f3SBarry Smith   PetscInt       i;
27065d2a9ed1SStefano Zampini   PetscMPIInt    size;
2707a23d5eceSKris Buschelman 
2708a23d5eceSKris Buschelman   PetscFunctionBegin;
270933d57670SJed Brown   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
271026283091SBarry Smith   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
271126283091SBarry Smith   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2712e02043d6SBarry Smith   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2713899cda47SBarry Smith 
2714a23d5eceSKris Buschelman   if (d_nnz) {
2715d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2716e32f2f54SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2717a23d5eceSKris Buschelman     }
2718a23d5eceSKris Buschelman   }
2719a23d5eceSKris Buschelman   if (o_nnz) {
2720d0f46423SBarry Smith     for (i=0; i<B->rmap->n/bs; i++) {
2721e32f2f54SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2722a23d5eceSKris Buschelman     }
2723a23d5eceSKris Buschelman   }
2724a23d5eceSKris Buschelman 
2725a23d5eceSKris Buschelman   b      = (Mat_MPIBAIJ*)B->data;
2726a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2727d0f46423SBarry Smith   b->mbs = B->rmap->n/bs;
2728d0f46423SBarry Smith   b->nbs = B->cmap->n/bs;
2729d0f46423SBarry Smith   b->Mbs = B->rmap->N/bs;
2730d0f46423SBarry Smith   b->Nbs = B->cmap->N/bs;
2731a23d5eceSKris Buschelman 
2732a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2733d0f46423SBarry Smith     b->rangebs[i] = B->rmap->range[i]/bs;
2734a23d5eceSKris Buschelman   }
2735d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart/bs;
2736d0f46423SBarry Smith   b->rendbs   = B->rmap->rend/bs;
2737d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart/bs;
2738d0f46423SBarry Smith   b->cendbs   = B->cmap->rend/bs;
2739a23d5eceSKris Buschelman 
2740cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
2741cb7b82ddSBarry Smith   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2742cb7b82ddSBarry Smith #else
2743cb7b82ddSBarry Smith   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2744cb7b82ddSBarry Smith #endif
2745cb7b82ddSBarry Smith   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2746cb7b82ddSBarry Smith   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2747cb7b82ddSBarry Smith   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2748cb7b82ddSBarry Smith 
2749cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
27505d2a9ed1SStefano Zampini   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2751cb7b82ddSBarry Smith   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2752cb7b82ddSBarry Smith   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
27535d2a9ed1SStefano Zampini   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2754cb7b82ddSBarry Smith   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2755cb7b82ddSBarry Smith   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2756cb7b82ddSBarry Smith 
2757526dfc15SBarry Smith   if (!B->preallocated) {
2758f69a0ea3SMatthew Knepley     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2759d0f46423SBarry Smith     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
27609c097c71SKris Buschelman     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
27613bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2762ce94432eSBarry Smith     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2763526dfc15SBarry Smith   }
2764a23d5eceSKris Buschelman 
2765526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2766526dfc15SBarry Smith   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2767526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
2768cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
2769cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
2770a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2771a23d5eceSKris Buschelman }
2772a23d5eceSKris Buschelman 
27737087cfbeSBarry Smith extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
27747087cfbeSBarry Smith extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
27755bf65638SKris Buschelman 
2776cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
277782094794SBarry Smith {
277882094794SBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
277982094794SBarry Smith   PetscErrorCode ierr;
278082094794SBarry Smith   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
278182094794SBarry Smith   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
278282094794SBarry Smith   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
278382094794SBarry Smith 
278482094794SBarry Smith   PetscFunctionBegin;
2785854ce69bSBarry Smith   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
278682094794SBarry Smith   ii[0] = 0;
278782094794SBarry Smith   for (i=0; i<M; i++) {
2788e32f2f54SBarry Smith     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2789e32f2f54SBarry Smith     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
279082094794SBarry Smith     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
27915ee9ba1cSJed Brown     /* remove one from count of matrix has diagonal */
27925ee9ba1cSJed Brown     for (j=id[i]; j<id[i+1]; j++) {
27935ee9ba1cSJed Brown       if (jd[j] == i) {ii[i+1]--;break;}
27945ee9ba1cSJed Brown     }
279582094794SBarry Smith   }
2796785e854fSJed Brown   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
279782094794SBarry Smith   cnt  = 0;
279882094794SBarry Smith   for (i=0; i<M; i++) {
279982094794SBarry Smith     for (j=io[i]; j<io[i+1]; j++) {
280082094794SBarry Smith       if (garray[jo[j]] > rstart) break;
280182094794SBarry Smith       jj[cnt++] = garray[jo[j]];
280282094794SBarry Smith     }
280382094794SBarry Smith     for (k=id[i]; k<id[i+1]; k++) {
28045ee9ba1cSJed Brown       if (jd[k] != i) {
280582094794SBarry Smith         jj[cnt++] = rstart + jd[k];
280682094794SBarry Smith       }
28075ee9ba1cSJed Brown     }
280882094794SBarry Smith     for (; j<io[i+1]; j++) {
280982094794SBarry Smith       jj[cnt++] = garray[jo[j]];
281082094794SBarry Smith     }
281182094794SBarry Smith   }
2812ce94432eSBarry Smith   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
281382094794SBarry Smith   PetscFunctionReturn(0);
281482094794SBarry Smith }
281582094794SBarry Smith 
2816c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
281762471d69SBarry Smith 
2818cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2819b2573a8aSBarry Smith 
2820cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
282162471d69SBarry Smith {
282262471d69SBarry Smith   PetscErrorCode ierr;
282362471d69SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
282462471d69SBarry Smith   Mat            B;
282585a69837SSatish Balay   Mat_MPIAIJ     *b;
282662471d69SBarry Smith 
282762471d69SBarry Smith   PetscFunctionBegin;
2828ce94432eSBarry Smith   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
282962471d69SBarry Smith 
28300f6d62edSLisandro Dalcin   if (reuse == MAT_REUSE_MATRIX) {
28310f6d62edSLisandro Dalcin     B = *newmat;
28320f6d62edSLisandro Dalcin   } else {
2833ce94432eSBarry Smith     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
28346d0a4a0eSHong Zhang     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2835f090d951SRémi Lacroix     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2836f090d951SRémi Lacroix     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
28370298fd71SBarry Smith     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
28380298fd71SBarry Smith     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
28390f6d62edSLisandro Dalcin   }
284062471d69SBarry Smith   b = (Mat_MPIAIJ*) B->data;
284162471d69SBarry Smith 
28420f6d62edSLisandro Dalcin   if (reuse == MAT_REUSE_MATRIX) {
28430f6d62edSLisandro Dalcin     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
28440f6d62edSLisandro Dalcin     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
28450f6d62edSLisandro Dalcin   } else {
28466bf464f9SBarry Smith     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
28476bf464f9SBarry Smith     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2848ab9863d7SBarry Smith     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
284962471d69SBarry Smith     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
285062471d69SBarry Smith     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
28516a719282SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28526a719282SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28530f6d62edSLisandro Dalcin   }
28540f6d62edSLisandro Dalcin   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28550f6d62edSLisandro Dalcin   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
28560f6d62edSLisandro Dalcin 
2857511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
285828be2f97SBarry Smith     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
285962471d69SBarry Smith   } else {
286062471d69SBarry Smith    *newmat = B;
286162471d69SBarry Smith   }
286262471d69SBarry Smith   PetscFunctionReturn(0);
286362471d69SBarry Smith }
286462471d69SBarry Smith 
28650bad9183SKris Buschelman /*MC
2866fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
28670bad9183SKris Buschelman 
28680bad9183SKris Buschelman    Options Database Keys:
28698c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
28708c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix
28718c07d4e3SBarry Smith - -mat_use_hash_table <fact>
28720bad9183SKris Buschelman 
28730bad9183SKris Buschelman    Level: beginner
28740cd7f59aSBarry Smith 
28750cd7f59aSBarry Smith    Notes:
28760cd7f59aSBarry Smith     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
28770cd7f59aSBarry Smith     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
28780bad9183SKris Buschelman 
2879fd292e60Sprj- .seealso: MatCreateBAIJ
28800bad9183SKris Buschelman M*/
28810bad9183SKris Buschelman 
2882cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2883c0cdd4a1SDahai Guo 
28848cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2885273d9f13SBarry Smith {
2886273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
2887dfbe8321SBarry Smith   PetscErrorCode ierr;
288894ae4db5SBarry Smith   PetscBool      flg = PETSC_FALSE;
2889273d9f13SBarry Smith 
2890273d9f13SBarry Smith   PetscFunctionBegin;
2891b00a9115SJed Brown   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
289282502324SSatish Balay   B->data = (void*)b;
289382502324SSatish Balay 
2894273d9f13SBarry Smith   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2895273d9f13SBarry Smith   B->assembled = PETSC_FALSE;
2896273d9f13SBarry Smith 
2897273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2898ce94432eSBarry Smith   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2899ce94432eSBarry Smith   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2900273d9f13SBarry Smith 
2901273d9f13SBarry Smith   /* build local table of row and column ownerships */
2902854ce69bSBarry Smith   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2903273d9f13SBarry Smith 
2904273d9f13SBarry Smith   /* build cache for off array entries formed */
2905ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
290626fbe8dcSKarl Rupp 
2907273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
29080298fd71SBarry Smith   b->colmap      = NULL;
29090298fd71SBarry Smith   b->garray      = NULL;
2910273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2911273d9f13SBarry Smith 
2912273d9f13SBarry Smith   /* stuff used in block assembly */
2913273d9f13SBarry Smith   b->barray = 0;
2914273d9f13SBarry Smith 
2915273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2916273d9f13SBarry Smith   b->lvec  = 0;
2917273d9f13SBarry Smith   b->Mvctx = 0;
2918273d9f13SBarry Smith 
2919273d9f13SBarry Smith   /* stuff for MatGetRow() */
2920273d9f13SBarry Smith   b->rowindices   = 0;
2921273d9f13SBarry Smith   b->rowvalues    = 0;
2922273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2923273d9f13SBarry Smith 
2924273d9f13SBarry Smith   /* hash table stuff */
2925273d9f13SBarry Smith   b->ht           = 0;
2926273d9f13SBarry Smith   b->hd           = 0;
2927273d9f13SBarry Smith   b->ht_size      = 0;
2928273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2929273d9f13SBarry Smith   b->ht_fact      = 0;
2930273d9f13SBarry Smith   b->ht_total_ct  = 0;
2931273d9f13SBarry Smith   b->ht_insert_ct = 0;
2932273d9f13SBarry Smith 
29337dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
29347a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
29357a868f3eSHong Zhang 
29368c07d4e3SBarry Smith 
2937bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2938bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2939bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
29407ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
29417ea3e4caSstefano_zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
29427ea3e4caSstefano_zampini #endif
2943bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2944bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2945bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2946bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2947bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2948bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2949c9225affSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
295017667f90SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
295194ae4db5SBarry Smith 
295294ae4db5SBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2953abf3b562SBarry Smith   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
295494ae4db5SBarry Smith   if (flg) {
295594ae4db5SBarry Smith     PetscReal fact = 1.39;
295694ae4db5SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
295794ae4db5SBarry Smith     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
295894ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
295994ae4db5SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
296094ae4db5SBarry Smith     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
296194ae4db5SBarry Smith   }
296294ae4db5SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2963273d9f13SBarry Smith   PetscFunctionReturn(0);
2964273d9f13SBarry Smith }
2965273d9f13SBarry Smith 
2966209238afSKris Buschelman /*MC
2967002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2968209238afSKris Buschelman 
2969209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2970209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2971209238afSKris Buschelman 
2972209238afSKris Buschelman    Options Database Keys:
2973209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2974209238afSKris Buschelman 
2975209238afSKris Buschelman   Level: beginner
2976209238afSKris Buschelman 
297769b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2978209238afSKris Buschelman M*/
2979209238afSKris Buschelman 
2980273d9f13SBarry Smith /*@C
2981aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2982273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2983273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2984273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2985273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2986273d9f13SBarry Smith 
2987273d9f13SBarry Smith    Collective on Mat
2988273d9f13SBarry Smith 
2989273d9f13SBarry Smith    Input Parameters:
29901c4f3114SJed Brown +  B - the matrix
2991bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2992bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2993273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2994273d9f13SBarry Smith            submatrix  (same for all local rows)
2995273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2996273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
29970298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
299895742e49SBarry Smith            set it even if it is zero.
2999273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3000273d9f13SBarry Smith            submatrix (same for all local rows).
3001273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
3002273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
30030298fd71SBarry Smith            each block row) or NULL.
3004273d9f13SBarry Smith 
300549a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
3006273d9f13SBarry Smith 
3007273d9f13SBarry Smith    Options Database Keys:
30088c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
30098c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
3010273d9f13SBarry Smith 
3011273d9f13SBarry Smith    Notes:
3012273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3013273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
3014273d9f13SBarry Smith 
3015273d9f13SBarry Smith    Storage Information:
3016273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
3017273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
3018273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
3019273d9f13SBarry Smith    local matrix (a rectangular submatrix).
3020273d9f13SBarry Smith 
3021273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
3022273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
30230298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3024273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
3025273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3026273d9f13SBarry Smith 
3027273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3028273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
3029273d9f13SBarry Smith 
3030273d9f13SBarry Smith .vb
3031273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
3032a4b1a0f6SJed Brown           --------------------------
3033273d9f13SBarry Smith    row 3  |o o o d d d o o o o  o  o
3034273d9f13SBarry Smith    row 4  |o o o d d d o o o o  o  o
3035273d9f13SBarry Smith    row 5  |o o o d d d o o o o  o  o
3036a4b1a0f6SJed Brown           --------------------------
3037273d9f13SBarry Smith .ve
3038273d9f13SBarry Smith 
3039273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
3040273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
3041273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3042273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
3043273d9f13SBarry Smith 
3044273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3045273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3046273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
3047273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3048273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
3049273d9f13SBarry Smith    matrices.
3050273d9f13SBarry Smith 
3051aa95bbe8SBarry Smith    You can call MatGetInfo() to get information on how effective the preallocation was;
3052aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3053aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3054aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3055aa95bbe8SBarry Smith 
3056273d9f13SBarry Smith    Level: intermediate
3057273d9f13SBarry Smith 
3058ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3059273d9f13SBarry Smith @*/
30607087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3061273d9f13SBarry Smith {
30624ac538c5SBarry Smith   PetscErrorCode ierr;
3063273d9f13SBarry Smith 
3064273d9f13SBarry Smith   PetscFunctionBegin;
30656ba663aaSJed Brown   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
30666ba663aaSJed Brown   PetscValidType(B,1);
30676ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B,bs,2);
30684ac538c5SBarry Smith   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3069273d9f13SBarry Smith   PetscFunctionReturn(0);
3070273d9f13SBarry Smith }
3071273d9f13SBarry Smith 
307279bdfe76SSatish Balay /*@C
307369b1f4b7SBarry Smith    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
307479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
307579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
307679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
307779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
307879bdfe76SSatish Balay 
3079d083f849SBarry Smith    Collective
3080db81eaa0SLois Curfman McInnes 
308179bdfe76SSatish Balay    Input Parameters:
3082db81eaa0SLois Curfman McInnes +  comm - MPI communicator
3083bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3084bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
308579bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
308692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
308792e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
308892e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
308992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
309092e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
3091be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3092be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
309347a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
309479bdfe76SSatish Balay            submatrix  (same for all local rows)
309547a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
309692e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
30970298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
309895742e49SBarry Smith            and set it even if it is zero.
309947a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
310079bdfe76SSatish Balay            submatrix (same for all local rows).
310147a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
310292e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
31030298fd71SBarry Smith            each block row) or NULL.
310479bdfe76SSatish Balay 
310579bdfe76SSatish Balay    Output Parameter:
310679bdfe76SSatish Balay .  A - the matrix
310779bdfe76SSatish Balay 
3108db81eaa0SLois Curfman McInnes    Options Database Keys:
31098c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
31108c07d4e3SBarry Smith -   -mat_use_hash_table <fact>
31113ffaccefSLois Curfman McInnes 
3112175b88e8SBarry Smith    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3113f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3114175b88e8SBarry Smith    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3115175b88e8SBarry Smith 
3116b259b22eSLois Curfman McInnes    Notes:
311749a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
311849a6f317SBarry Smith 
311947a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
312047a75d0bSBarry Smith 
312179bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
312279bdfe76SSatish Balay    (possibly both).
312379bdfe76SSatish Balay 
3124be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3125be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
3126be79a94dSBarry Smith 
312779bdfe76SSatish Balay    Storage Information:
312879bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
312979bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
313079bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
313179bdfe76SSatish Balay    local matrix (a rectangular submatrix).
313279bdfe76SSatish Balay 
313379bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
313479bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
31350298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
313679bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
313779bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
313879bdfe76SSatish Balay 
313979bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
314079bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
314179bdfe76SSatish Balay 
3142db81eaa0SLois Curfman McInnes .vb
3143db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
3144a4b1a0f6SJed Brown           --------------------------
3145db81eaa0SLois Curfman McInnes    row 3  |o o o d d d o o o o  o  o
3146db81eaa0SLois Curfman McInnes    row 4  |o o o d d d o o o o  o  o
3147db81eaa0SLois Curfman McInnes    row 5  |o o o d d d o o o o  o  o
3148a4b1a0f6SJed Brown           --------------------------
3149db81eaa0SLois Curfman McInnes .ve
315079bdfe76SSatish Balay 
315179bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
315279bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
315379bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
315457b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
315579bdfe76SSatish Balay 
3156d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3157d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
315879bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
315992e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
316092e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
31616da5968aSLois Curfman McInnes    matrices.
316279bdfe76SSatish Balay 
3163027ccd11SLois Curfman McInnes    Level: intermediate
3164027ccd11SLois Curfman McInnes 
316569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
316679bdfe76SSatish Balay @*/
316769b1f4b7SBarry Smith PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
316879bdfe76SSatish Balay {
31696849ba73SBarry Smith   PetscErrorCode ierr;
3170b24ad042SBarry Smith   PetscMPIInt    size;
317179bdfe76SSatish Balay 
3172d64ed03dSBarry Smith   PetscFunctionBegin;
3173f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3174f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3175d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3176273d9f13SBarry Smith   if (size > 1) {
3177273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3178273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3179273d9f13SBarry Smith   } else {
3180273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3181273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
31823914022bSBarry Smith   }
31833a40ed3dSBarry Smith   PetscFunctionReturn(0);
318479bdfe76SSatish Balay }
3185026e39d0SSatish Balay 
31866849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
31870ac07820SSatish Balay {
31880ac07820SSatish Balay   Mat            mat;
31890ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3190dfbe8321SBarry Smith   PetscErrorCode ierr;
3191b24ad042SBarry Smith   PetscInt       len=0;
31920ac07820SSatish Balay 
3193d64ed03dSBarry Smith   PetscFunctionBegin;
31940ac07820SSatish Balay   *newmat = 0;
3195ce94432eSBarry Smith   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3196d0f46423SBarry Smith   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
31977adad957SLisandro Dalcin   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
31987fff6886SHong Zhang 
3199d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
3200273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
32010ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
32027fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
32037fff6886SHong Zhang 
3204273d9f13SBarry Smith   a             = (Mat_MPIBAIJ*)mat->data;
3205d0f46423SBarry Smith   mat->rmap->bs = matin->rmap->bs;
32060ac07820SSatish Balay   a->bs2        = oldmat->bs2;
32070ac07820SSatish Balay   a->mbs        = oldmat->mbs;
32080ac07820SSatish Balay   a->nbs        = oldmat->nbs;
32090ac07820SSatish Balay   a->Mbs        = oldmat->Mbs;
32100ac07820SSatish Balay   a->Nbs        = oldmat->Nbs;
32110ac07820SSatish Balay 
32121e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
32131e1e43feSBarry Smith   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3214899cda47SBarry Smith 
32150ac07820SSatish Balay   a->size         = oldmat->size;
32160ac07820SSatish Balay   a->rank         = oldmat->rank;
3217aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
3218aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
3219aef5e8e0SSatish Balay   a->rowindices   = 0;
32200ac07820SSatish Balay   a->rowvalues    = 0;
32210ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
322230793edcSSatish Balay   a->barray       = 0;
3223899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
3224899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
3225899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
3226899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
32270ac07820SSatish Balay 
3228133cdb44SSatish Balay   /* hash table stuff */
3229133cdb44SSatish Balay   a->ht           = 0;
3230133cdb44SSatish Balay   a->hd           = 0;
3231133cdb44SSatish Balay   a->ht_size      = 0;
3232133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
323325fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
3234133cdb44SSatish Balay   a->ht_total_ct  = 0;
3235133cdb44SSatish Balay   a->ht_insert_ct = 0;
3236133cdb44SSatish Balay 
3237580bdb30SBarry Smith   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
32380ac07820SSatish Balay   if (oldmat->colmap) {
3239aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
32400f5bd95cSBarry Smith     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
324148e59246SSatish Balay #else
3242854ce69bSBarry Smith     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
32433bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3244580bdb30SBarry Smith     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
324548e59246SSatish Balay #endif
32460ac07820SSatish Balay   } else a->colmap = 0;
32474beb1cfeSHong Zhang 
32480ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3249785e854fSJed Brown     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
32503bb1ff40SBarry Smith     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3251580bdb30SBarry Smith     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
32520ac07820SSatish Balay   } else a->garray = 0;
32530ac07820SSatish Balay 
3254ce94432eSBarry Smith   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
32550ac07820SSatish Balay   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
32563bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
32570ac07820SSatish Balay   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
32583bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
32597fff6886SHong Zhang 
32602e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
32613bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
32622e8a6d31SBarry Smith   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
32633bb1ff40SBarry Smith   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3264140e18c1SBarry Smith   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
32650ac07820SSatish Balay   *newmat = mat;
32663a40ed3dSBarry Smith   PetscFunctionReturn(0);
32670ac07820SSatish Balay }
326857b952d6SSatish Balay 
3269618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
3270b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3271b51a4376SLisandro Dalcin {
3272b51a4376SLisandro Dalcin   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3273b51a4376SLisandro Dalcin   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3274b51a4376SLisandro Dalcin   PetscScalar    *matvals;
3275b51a4376SLisandro Dalcin   PetscErrorCode ierr;
3276b51a4376SLisandro Dalcin 
3277b51a4376SLisandro Dalcin   PetscFunctionBegin;
3278b51a4376SLisandro Dalcin   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3279b51a4376SLisandro Dalcin 
3280b51a4376SLisandro Dalcin   /* read in matrix header */
3281b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3282b51a4376SLisandro Dalcin   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3283b51a4376SLisandro Dalcin   M  = header[1]; N = header[2]; nz = header[3];
3284b51a4376SLisandro Dalcin   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3285b51a4376SLisandro Dalcin   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3286b51a4376SLisandro Dalcin   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3287b51a4376SLisandro Dalcin 
3288b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
3289b51a4376SLisandro Dalcin   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3290618cc2edSLisandro Dalcin   /* set local sizes if not set already */
3291618cc2edSLisandro Dalcin   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3292618cc2edSLisandro Dalcin   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3293b51a4376SLisandro Dalcin   /* set global sizes if not set already */
3294b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3295b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
3296b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3297b51a4376SLisandro Dalcin   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3298b51a4376SLisandro Dalcin 
3299b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
3300b51a4376SLisandro Dalcin   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3301b51a4376SLisandro Dalcin   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3302b51a4376SLisandro Dalcin   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3303b51a4376SLisandro Dalcin   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3304b51a4376SLisandro Dalcin   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3305b51a4376SLisandro Dalcin   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3306b51a4376SLisandro Dalcin   mbs = m/bs; nbs = n/bs;
3307b51a4376SLisandro Dalcin 
3308b51a4376SLisandro Dalcin   /* read in row lengths and build row indices */
3309b51a4376SLisandro Dalcin   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3310b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3311b51a4376SLisandro Dalcin   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3312b51a4376SLisandro Dalcin   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3313b51a4376SLisandro Dalcin   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3314b51a4376SLisandro Dalcin 
3315b51a4376SLisandro Dalcin   /* read in column indices and matrix values */
3316b51a4376SLisandro Dalcin   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3317b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3318b51a4376SLisandro Dalcin   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3319b51a4376SLisandro Dalcin 
3320b51a4376SLisandro Dalcin   { /* preallocate matrix storage */
3321b51a4376SLisandro Dalcin     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3322b51a4376SLisandro Dalcin     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3323618cc2edSLisandro Dalcin     PetscBool  sbaij,done;
3324b51a4376SLisandro Dalcin     PetscInt   *d_nnz,*o_nnz;
3325b51a4376SLisandro Dalcin 
3326b51a4376SLisandro Dalcin     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3327b51a4376SLisandro Dalcin     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3328b51a4376SLisandro Dalcin     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3329618cc2edSLisandro Dalcin     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3330b51a4376SLisandro Dalcin     for (i=0; i<mbs; i++) {
3331b51a4376SLisandro Dalcin       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3332b51a4376SLisandro Dalcin       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3333618cc2edSLisandro Dalcin       for (k=0; k<bs; k++) {
3334618cc2edSLisandro Dalcin         PetscInt row = bs*i + k;
3335618cc2edSLisandro Dalcin         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3336618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3337618cc2edSLisandro Dalcin           if (!sbaij || col >= row) {
3338618cc2edSLisandro Dalcin             if (col >= cs && col < ce) {
3339618cc2edSLisandro Dalcin               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3340b51a4376SLisandro Dalcin             } else {
3341618cc2edSLisandro Dalcin               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3342b51a4376SLisandro Dalcin               if (done) o_nnz[i]++;
3343b51a4376SLisandro Dalcin             }
3344b51a4376SLisandro Dalcin           }
3345618cc2edSLisandro Dalcin         }
3346618cc2edSLisandro Dalcin       }
3347618cc2edSLisandro Dalcin     }
3348b51a4376SLisandro Dalcin     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3349b51a4376SLisandro Dalcin     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3350b51a4376SLisandro Dalcin     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3351618cc2edSLisandro Dalcin     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3352b51a4376SLisandro Dalcin     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3353b51a4376SLisandro Dalcin   }
3354b51a4376SLisandro Dalcin 
3355b51a4376SLisandro Dalcin   /* store matrix values */
3356b51a4376SLisandro Dalcin   for (i=0; i<m; i++) {
3357b51a4376SLisandro Dalcin     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3358618cc2edSLisandro Dalcin     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3359b51a4376SLisandro Dalcin   }
3360b51a4376SLisandro Dalcin 
3361b51a4376SLisandro Dalcin   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3362b51a4376SLisandro Dalcin   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3363b51a4376SLisandro Dalcin   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3364b51a4376SLisandro Dalcin   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3365b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3366b51a4376SLisandro Dalcin }
3367b51a4376SLisandro Dalcin 
3368b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
33694683f7a4SShri Abhyankar {
33704683f7a4SShri Abhyankar   PetscErrorCode ierr;
33717f489da9SVaclav Hapla   PetscBool      isbinary;
33724683f7a4SShri Abhyankar 
33734683f7a4SShri Abhyankar   PetscFunctionBegin;
33747f489da9SVaclav Hapla   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3375b51a4376SLisandro Dalcin   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3376b51a4376SLisandro Dalcin   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
33774683f7a4SShri Abhyankar   PetscFunctionReturn(0);
33784683f7a4SShri Abhyankar }
33794683f7a4SShri Abhyankar 
3380133cdb44SSatish Balay /*@
3381133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3382133cdb44SSatish Balay 
3383133cdb44SSatish Balay    Input Parameters:
3384a2b725a8SWilliam Gropp +  mat  - the matrix
3385a2b725a8SWilliam Gropp -  fact - factor
3386133cdb44SSatish Balay 
3387c5eb9154SBarry Smith    Not Collective, each process can use a different factor
3388fee21e36SBarry Smith 
33898c890885SBarry Smith    Level: advanced
33908c890885SBarry Smith 
3391133cdb44SSatish Balay   Notes:
33928c07d4e3SBarry Smith    This can also be set by the command line option: -mat_use_hash_table <fact>
3393133cdb44SSatish Balay 
3394133cdb44SSatish Balay .seealso: MatSetOption()
3395133cdb44SSatish Balay @*/
33967087cfbeSBarry Smith PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3397133cdb44SSatish Balay {
33984ac538c5SBarry Smith   PetscErrorCode ierr;
33995bf65638SKris Buschelman 
34005bf65638SKris Buschelman   PetscFunctionBegin;
34014ac538c5SBarry Smith   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
34025bf65638SKris Buschelman   PetscFunctionReturn(0);
34035bf65638SKris Buschelman }
34045bf65638SKris Buschelman 
34057087cfbeSBarry Smith PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
34065bf65638SKris Buschelman {
340725fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3408133cdb44SSatish Balay 
3409133cdb44SSatish Balay   PetscFunctionBegin;
3410133cdb44SSatish Balay   baij          = (Mat_MPIBAIJ*)mat->data;
3411133cdb44SSatish Balay   baij->ht_fact = fact;
3412133cdb44SSatish Balay   PetscFunctionReturn(0);
3413133cdb44SSatish Balay }
3414f2a5309cSSatish Balay 
34159230625dSJed Brown PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3416f2a5309cSSatish Balay {
3417f2a5309cSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3418ab4d48faSStefano Zampini   PetscBool      flg;
3419ab4d48faSStefano Zampini   PetscErrorCode ierr;
34205fd66863SKarl Rupp 
3421f2a5309cSSatish Balay   PetscFunctionBegin;
3422ab4d48faSStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3423ab4d48faSStefano Zampini   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
342421e72a00SBarry Smith   if (Ad)     *Ad     = a->A;
342521e72a00SBarry Smith   if (Ao)     *Ao     = a->B;
342621e72a00SBarry Smith   if (colmap) *colmap = a->garray;
3427f2a5309cSSatish Balay   PetscFunctionReturn(0);
3428f2a5309cSSatish Balay }
342985535b8eSBarry Smith 
343085535b8eSBarry Smith /*
343185535b8eSBarry Smith     Special version for direct calls from Fortran (to eliminate two function call overheads
343285535b8eSBarry Smith */
343385535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
343485535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
343585535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
343685535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
343785535b8eSBarry Smith #endif
343885535b8eSBarry Smith 
343985535b8eSBarry Smith /*@C
344085535b8eSBarry Smith   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
344185535b8eSBarry Smith 
344285535b8eSBarry Smith   Collective on Mat
344385535b8eSBarry Smith 
344485535b8eSBarry Smith   Input Parameters:
344585535b8eSBarry Smith + mat - the matrix
344685535b8eSBarry Smith . min - number of input rows
344785535b8eSBarry Smith . im - input rows
344885535b8eSBarry Smith . nin - number of input columns
344985535b8eSBarry Smith . in - input columns
345085535b8eSBarry Smith . v - numerical values input
345185535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES
345285535b8eSBarry Smith 
345395452b02SPatrick Sanan   Notes:
345495452b02SPatrick Sanan     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
345585535b8eSBarry Smith 
345685535b8eSBarry Smith   Level: advanced
345785535b8eSBarry Smith 
345885535b8eSBarry Smith .seealso:   MatSetValuesBlocked()
345985535b8eSBarry Smith @*/
346085535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
346185535b8eSBarry Smith {
346285535b8eSBarry Smith   /* convert input arguments to C version */
346385535b8eSBarry Smith   Mat        mat  = *matin;
346485535b8eSBarry Smith   PetscInt   m    = *min, n = *nin;
346585535b8eSBarry Smith   InsertMode addv = *addvin;
346685535b8eSBarry Smith 
346785535b8eSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
346885535b8eSBarry Smith   const MatScalar *value;
346985535b8eSBarry Smith   MatScalar       *barray     = baij->barray;
3470ace3abfcSBarry Smith   PetscBool       roworiented = baij->roworiented;
347185535b8eSBarry Smith   PetscErrorCode  ierr;
347285535b8eSBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
347385535b8eSBarry Smith   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3474d0f46423SBarry Smith   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
347585535b8eSBarry Smith 
347685535b8eSBarry Smith   PetscFunctionBegin;
347785535b8eSBarry Smith   /* tasks normally handled by MatSetValuesBlocked() */
347826fbe8dcSKarl Rupp   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
347976bd3646SJed Brown   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
348076bd3646SJed Brown   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
348185535b8eSBarry Smith   if (mat->assembled) {
348285535b8eSBarry Smith     mat->was_assembled = PETSC_TRUE;
348385535b8eSBarry Smith     mat->assembled     = PETSC_FALSE;
348485535b8eSBarry Smith   }
348585535b8eSBarry Smith   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
348685535b8eSBarry Smith 
348785535b8eSBarry Smith 
348885535b8eSBarry Smith   if (!barray) {
3489785e854fSJed Brown     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
349085535b8eSBarry Smith     baij->barray = barray;
349185535b8eSBarry Smith   }
349285535b8eSBarry Smith 
349326fbe8dcSKarl Rupp   if (roworiented) stepval = (n-1)*bs;
349426fbe8dcSKarl Rupp   else stepval = (m-1)*bs;
349526fbe8dcSKarl Rupp 
349685535b8eSBarry Smith   for (i=0; i<m; i++) {
349785535b8eSBarry Smith     if (im[i] < 0) continue;
3498*cf9c20a2SJed Brown     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
349985535b8eSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
350085535b8eSBarry Smith       row = im[i] - rstart;
350185535b8eSBarry Smith       for (j=0; j<n; j++) {
350285535b8eSBarry Smith         /* If NumCol = 1 then a copy is not required */
350385535b8eSBarry Smith         if ((roworiented) && (n == 1)) {
350485535b8eSBarry Smith           barray = (MatScalar*)v + i*bs2;
350585535b8eSBarry Smith         } else if ((!roworiented) && (m == 1)) {
350685535b8eSBarry Smith           barray = (MatScalar*)v + j*bs2;
350785535b8eSBarry Smith         } else { /* Here a copy is required */
350885535b8eSBarry Smith           if (roworiented) {
350985535b8eSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
351085535b8eSBarry Smith           } else {
351185535b8eSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
351285535b8eSBarry Smith           }
351385535b8eSBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
351485535b8eSBarry Smith             for (jj=0; jj<bs; jj++) {
351585535b8eSBarry Smith               *barray++ = *value++;
351685535b8eSBarry Smith             }
351785535b8eSBarry Smith           }
351885535b8eSBarry Smith           barray -=bs2;
351985535b8eSBarry Smith         }
352085535b8eSBarry Smith 
352185535b8eSBarry Smith         if (in[j] >= cstart && in[j] < cend) {
352285535b8eSBarry Smith           col  = in[j] - cstart;
35238ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
352426fbe8dcSKarl Rupp         } else if (in[j] < 0) continue;
3525*cf9c20a2SJed Brown         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
352685535b8eSBarry Smith         else {
352785535b8eSBarry Smith           if (mat->was_assembled) {
352885535b8eSBarry Smith             if (!baij->colmap) {
3529ab9863d7SBarry Smith               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353085535b8eSBarry Smith             }
353185535b8eSBarry Smith 
353285535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
353385535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
353485535b8eSBarry Smith             { PetscInt data;
353585535b8eSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3536e32f2f54SBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
353785535b8eSBarry Smith             }
353885535b8eSBarry Smith #else
3539e32f2f54SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
354085535b8eSBarry Smith #endif
354185535b8eSBarry Smith #endif
354285535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
354385535b8eSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354485535b8eSBarry Smith             col  = (col - 1)/bs;
354585535b8eSBarry Smith #else
354685535b8eSBarry Smith             col = (baij->colmap[in[j]] - 1)/bs;
354785535b8eSBarry Smith #endif
354885535b8eSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3549ab9863d7SBarry Smith               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355085535b8eSBarry Smith               col  =  in[j];
355185535b8eSBarry Smith             }
355226fbe8dcSKarl Rupp           } else col = in[j];
35538ab52850SBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
355485535b8eSBarry Smith         }
355585535b8eSBarry Smith       }
355685535b8eSBarry Smith     } else {
355785535b8eSBarry Smith       if (!baij->donotstash) {
355885535b8eSBarry Smith         if (roworiented) {
355985535b8eSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
356085535b8eSBarry Smith         } else {
356185535b8eSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
356285535b8eSBarry Smith         }
356385535b8eSBarry Smith       }
356485535b8eSBarry Smith     }
356585535b8eSBarry Smith   }
356685535b8eSBarry Smith 
356785535b8eSBarry Smith   /* task normally handled by MatSetValuesBlocked() */
356885535b8eSBarry Smith   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
356985535b8eSBarry Smith   PetscFunctionReturn(0);
357085535b8eSBarry Smith }
3571dfb205c3SBarry Smith 
3572dfb205c3SBarry Smith /*@
3573483a2f95SBarry Smith      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3574dfb205c3SBarry Smith          CSR format the local rows.
3575dfb205c3SBarry Smith 
3576d083f849SBarry Smith    Collective
3577dfb205c3SBarry Smith 
3578dfb205c3SBarry Smith    Input Parameters:
3579dfb205c3SBarry Smith +  comm - MPI communicator
3580dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
3581dfb205c3SBarry Smith .  m - number of local rows (Cannot be PETSC_DECIDE)
3582dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
3583dfb205c3SBarry Smith        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3584dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
3585dfb205c3SBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3586dfb205c3SBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3587483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3588dfb205c3SBarry Smith .   j - column indices
3589dfb205c3SBarry Smith -   a - matrix values
3590dfb205c3SBarry Smith 
3591dfb205c3SBarry Smith    Output Parameter:
3592dfb205c3SBarry Smith .   mat - the matrix
3593dfb205c3SBarry Smith 
3594dfb205c3SBarry Smith    Level: intermediate
3595dfb205c3SBarry Smith 
3596dfb205c3SBarry Smith    Notes:
3597dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3598dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3599dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3600dfb205c3SBarry Smith 
36013adadaf3SJed Brown      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
36023adadaf3SJed Brown      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
36033adadaf3SJed Brown      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
36043adadaf3SJed Brown      with column-major ordering within blocks.
36053adadaf3SJed Brown 
3606dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3607dfb205c3SBarry Smith 
3608dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
360969b1f4b7SBarry Smith           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3610dfb205c3SBarry Smith @*/
36117087cfbeSBarry Smith PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3612dfb205c3SBarry Smith {
3613dfb205c3SBarry Smith   PetscErrorCode ierr;
3614dfb205c3SBarry Smith 
3615dfb205c3SBarry Smith   PetscFunctionBegin;
3616f23aa3ddSBarry Smith   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3617dfb205c3SBarry Smith   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3618dfb205c3SBarry Smith   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3619dfb205c3SBarry Smith   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
36209a43d2d5SJed Brown   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
362127f91139SJed Brown   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
362227f91139SJed Brown   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3623d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3624dfb205c3SBarry Smith   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3625d47bf9aaSJed Brown   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3626dfb205c3SBarry Smith   PetscFunctionReturn(0);
3627dfb205c3SBarry Smith }
3628e561ad89SHong Zhang 
3629bd153df0SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3630e561ad89SHong Zhang {
3631e561ad89SHong Zhang   PetscErrorCode ierr;
3632bd153df0SHong Zhang   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3633bd153df0SHong Zhang   PetscInt       *indx;
3634bd153df0SHong Zhang   PetscScalar    *values;
3635e561ad89SHong Zhang 
3636e561ad89SHong Zhang   PetscFunctionBegin;
3637e561ad89SHong Zhang   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3638bd153df0SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3639bd153df0SHong Zhang     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
36402c6ba4edSHong Zhang     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3641bd153df0SHong Zhang     PetscInt       *bindx,rmax=a->rmax,j;
364277f764caSHong Zhang     PetscMPIInt    rank,size;
3643e561ad89SHong Zhang 
3644bd153df0SHong Zhang     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3645bd153df0SHong Zhang     mbs = m/bs; Nbs = N/cbs;
3646bd153df0SHong Zhang     if (n == PETSC_DECIDE) {
3647da91a574SPierre Jolivet       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3648bd153df0SHong Zhang     }
3649da91a574SPierre Jolivet     nbs = n/cbs;
3650e561ad89SHong Zhang 
3651647a6520SHong Zhang     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
365277f764caSHong Zhang     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
365377f764caSHong Zhang 
365477f764caSHong Zhang     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
365577f764caSHong Zhang     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
365677f764caSHong Zhang     if (rank == size-1) {
365777f764caSHong Zhang       /* Check sum(nbs) = Nbs */
36582c6ba4edSHong Zhang       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
365977f764caSHong Zhang     }
366077f764caSHong Zhang 
366177f764caSHong Zhang     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3662bd153df0SHong Zhang     for (i=0; i<mbs; i++) {
3663647a6520SHong Zhang       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3664647a6520SHong Zhang       nnz = nnz/bs;
3665647a6520SHong Zhang       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3666647a6520SHong Zhang       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3667647a6520SHong Zhang       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3668e561ad89SHong Zhang     }
3669647a6520SHong Zhang     ierr = PetscFree(bindx);CHKERRQ(ierr);
3670e561ad89SHong Zhang 
3671e561ad89SHong Zhang     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
367277f764caSHong Zhang     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3673e561ad89SHong Zhang     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
36748761c3d6SHong Zhang     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
36758761c3d6SHong Zhang     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3676e561ad89SHong Zhang     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3677e561ad89SHong Zhang     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3678e561ad89SHong Zhang   }
3679e561ad89SHong Zhang 
3680bd153df0SHong Zhang   /* numeric phase */
3681647a6520SHong Zhang   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3682bd153df0SHong Zhang   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3683e561ad89SHong Zhang 
3684e561ad89SHong Zhang   for (i=0; i<m; i++) {
3685e561ad89SHong Zhang     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3686e561ad89SHong Zhang     Ii   = i + rstart;
3687bd153df0SHong Zhang     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3688e561ad89SHong Zhang     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3689e561ad89SHong Zhang   }
3690bd153df0SHong Zhang   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691bd153df0SHong Zhang   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692e561ad89SHong Zhang   PetscFunctionReturn(0);
3693e561ad89SHong Zhang }
3694