xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a873a8cd69acc6fd9b12ad3d6b30ee1bf0a81da9)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode    ierr;
15   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16   PetscScalar       *va,*vv;
17   Vec               vB,vA;
18   const PetscScalar *vb;
19 
20   PetscFunctionBegin;
21   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23 
24   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25   if (idx) {
26     for (i=0; i<m; i++) {
27       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
28     }
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
33   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
34 
35   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37   for (i=0; i<m; i++) {
38     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39       vv[i] = vb[i];
40       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41     } else {
42       vv[i] = va[i];
43       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
45     }
46   }
47   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50   ierr = PetscFree(idxb);CHKERRQ(ierr);
51   ierr = VecDestroy(&vA);CHKERRQ(ierr);
52   ierr = VecDestroy(&vB);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
57 {
58   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
63   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
85 {
86   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
87   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
88   PetscErrorCode ierr;
89   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
90 
91   PetscFunctionBegin;
92 #if defined(PETSC_USE_CTABLE)
93   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   for (i=0; i<nbs; i++) {
95     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
96   }
97 #else
98   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
99   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102   PetscFunctionReturn(0);
103 }
104 
105 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
106   { \
107     brow = row/bs;  \
108     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109     rmax = aimax[brow]; nrow = ailen[brow]; \
110     bcol = col/bs; \
111     ridx = row % bs; cidx = col % bs; \
112     low  = 0; high = nrow; \
113     while (high-low > 3) { \
114       t = (low+high)/2; \
115       if (rp[t] > bcol) high = t; \
116       else              low  = t; \
117     } \
118     for (_i=low; _i<high; _i++) { \
119       if (rp[_i] > bcol) break; \
120       if (rp[_i] == bcol) { \
121         bap = ap +  bs2*_i + bs*cidx + ridx; \
122         if (addv == ADD_VALUES) *bap += value;  \
123         else                    *bap  = value;  \
124         goto a_noinsert; \
125       } \
126     } \
127     if (a->nonew == 1) goto a_noinsert; \
128     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
129     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
130     N = nrow++ - 1;  \
131     /* shift up all the later entries in this row */ \
132     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
135     rp[_i]                      = bcol;  \
136     ap[bs2*_i + bs*cidx + ridx] = value;  \
137 a_noinsert:; \
138     ailen[brow] = nrow; \
139   }
140 
141 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142   { \
143     brow = row/bs;  \
144     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145     rmax = bimax[brow]; nrow = bilen[brow]; \
146     bcol = col/bs; \
147     ridx = row % bs; cidx = col % bs; \
148     low  = 0; high = nrow; \
149     while (high-low > 3) { \
150       t = (low+high)/2; \
151       if (rp[t] > bcol) high = t; \
152       else              low  = t; \
153     } \
154     for (_i=low; _i<high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap +  bs2*_i + bs*cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value;  \
159         else                    *bap  = value;  \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166     N = nrow++ - 1;  \
167     /* shift up all the later entries in this row */ \
168     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171     rp[_i]                      = bcol;  \
172     ap[bs2*_i + bs*cidx + ridx] = value;  \
173 b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
180   MatScalar      value;
181   PetscBool      roworiented = baij->roworiented;
182   PetscErrorCode ierr;
183   PetscInt       i,j,row,col;
184   PetscInt       rstart_orig=mat->rmap->rstart;
185   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
187 
188   /* Some Variables required in the macro */
189   Mat         A     = baij->A;
190   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
192   MatScalar   *aa   =a->a;
193 
194   Mat         B     = baij->B;
195   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
197   MatScalar   *ba   =b->a;
198 
199   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
201   MatScalar *ap,*bap;
202 
203   PetscFunctionBegin;
204   for (i=0; i<m; i++) {
205     if (im[i] < 0) continue;
206     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
207     if (im[i] >= rstart_orig && im[i] < rend_orig) {
208       row = im[i] - rstart_orig;
209       for (j=0; j<n; j++) {
210         if (in[j] >= cstart_orig && in[j] < cend_orig) {
211           col = in[j] - cstart_orig;
212           if (roworiented) value = v[i*n+j];
213           else             value = v[i+j*m];
214           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
215         } else if (in[j] < 0) continue;
216         else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
217         else {
218           if (mat->was_assembled) {
219             if (!baij->colmap) {
220               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221             }
222 #if defined(PETSC_USE_CTABLE)
223             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224             col  = col - 1;
225 #else
226             col = baij->colmap[in[j]/bs] - 1;
227 #endif
228             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
230               col  =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
232               B    = baij->B;
233               b    = (Mat_SeqBAIJ*)(B)->data;
234               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
235               ba   =b->a;
236             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
237             else col += in[j]%bs;
238           } else col = in[j];
239           if (roworiented) value = v[i*n+j];
240           else             value = v[i+j*m];
241           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
242           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         }
244       }
245     } else {
246       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
247       if (!baij->donotstash) {
248         mat->assembled = PETSC_FALSE;
249         if (roworiented) {
250           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251         } else {
252           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
253         }
254       }
255     }
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261 {
262   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
263   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265   PetscErrorCode    ierr;
266   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267   PetscBool         roworiented=a->roworiented;
268   const PetscScalar *value     = v;
269   MatScalar         *ap,*aa = a->a,*bap;
270 
271   PetscFunctionBegin;
272   rp   = aj + ai[row];
273   ap   = aa + bs2*ai[row];
274   rmax = imax[row];
275   nrow = ailen[row];
276   value = v;
277   low = 0;
278   high = nrow;
279   while (high-low > 7) {
280     t = (low+high)/2;
281     if (rp[t] > col) high = t;
282     else             low  = t;
283   }
284   for (i=low; i<high; i++) {
285     if (rp[i] > col) break;
286     if (rp[i] == col) {
287       bap = ap +  bs2*i;
288       if (roworiented) {
289         if (is == ADD_VALUES) {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] += *value++;
293             }
294           }
295         } else {
296           for (ii=0; ii<bs; ii++) {
297             for (jj=ii; jj<bs2; jj+=bs) {
298               bap[jj] = *value++;
299             }
300           }
301         }
302       } else {
303         if (is == ADD_VALUES) {
304           for (ii=0; ii<bs; ii++,value+=bs) {
305             for (jj=0; jj<bs; jj++) {
306               bap[jj] += value[jj];
307             }
308             bap += bs;
309           }
310         } else {
311           for (ii=0; ii<bs; ii++,value+=bs) {
312             for (jj=0; jj<bs; jj++) {
313               bap[jj]  = value[jj];
314             }
315             bap += bs;
316           }
317         }
318       }
319       goto noinsert2;
320     }
321   }
322   if (nonew == 1) goto noinsert2;
323   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
324   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325   N = nrow++ - 1; high++;
326   /* shift up all the later entries in this row */
327   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329   rp[i] = col;
330   bap   = ap +  bs2*i;
331   if (roworiented) {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=ii; jj<bs2; jj+=bs) {
334         bap[jj] = *value++;
335       }
336     }
337   } else {
338     for (ii=0; ii<bs; ii++) {
339       for (jj=0; jj<bs; jj++) {
340         *bap++ = *value++;
341       }
342     }
343   }
344   noinsert2:;
345   ailen[row] = nrow;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
351     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
352 */
353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356   const PetscScalar *value;
357   MatScalar         *barray     = baij->barray;
358   PetscBool         roworiented = baij->roworiented;
359   PetscErrorCode    ierr;
360   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363 
364   PetscFunctionBegin;
365   if (!barray) {
366     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
367     baij->barray = barray;
368   }
369 
370   if (roworiented) stepval = (n-1)*bs;
371   else stepval = (m-1)*bs;
372 
373   for (i=0; i<m; i++) {
374     if (im[i] < 0) continue;
375     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
376     if (im[i] >= rstart && im[i] < rend) {
377       row = im[i] - rstart;
378       for (j=0; j<n; j++) {
379         /* If NumCol = 1 then a copy is not required */
380         if ((roworiented) && (n == 1)) {
381           barray = (MatScalar*)v + i*bs2;
382         } else if ((!roworiented) && (m == 1)) {
383           barray = (MatScalar*)v + j*bs2;
384         } else { /* Here a copy is required */
385           if (roworiented) {
386             value = v + (i*(stepval+bs) + j)*bs;
387           } else {
388             value = v + (j*(stepval+bs) + i)*bs;
389           }
390           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
391             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
392             barray += bs;
393           }
394           barray -= bs2;
395         }
396 
397         if (in[j] >= cstart && in[j] < cend) {
398           col  = in[j] - cstart;
399           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
400         } else if (in[j] < 0) continue;
401         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
402         else {
403           if (mat->was_assembled) {
404             if (!baij->colmap) {
405               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406             }
407 
408 #if defined(PETSC_USE_DEBUG)
409 #if defined(PETSC_USE_CTABLE)
410             { PetscInt data;
411               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413             }
414 #else
415             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416 #endif
417 #endif
418 #if defined(PETSC_USE_CTABLE)
419             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420             col  = (col - 1)/bs;
421 #else
422             col = (baij->colmap[in[j]] - 1)/bs;
423 #endif
424             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426               col  =  in[j];
427             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
428           } else col = in[j];
429           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430         }
431       }
432     } else {
433       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434       if (!baij->donotstash) {
435         if (roworiented) {
436           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437         } else {
438           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439         }
440       }
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
451 {
452   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453   PetscBool      roworiented = baij->roworiented;
454   PetscErrorCode ierr;
455   PetscInt       i,j,row,col;
456   PetscInt       rstart_orig=mat->rmap->rstart;
457   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459   PetscReal      tmp;
460   MatScalar      **HD = baij->hd,value;
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 
463   PetscFunctionBegin;
464   for (i=0; i<m; i++) {
465     if (PetscDefined(USE_DEBUG)) {
466       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
468     }
469     row = im[i];
470     if (row >= rstart_orig && row < rend_orig) {
471       for (j=0; j<n; j++) {
472         col = in[j];
473         if (roworiented) value = v[i*n+j];
474         else             value = v[i+j*m];
475         /* Look up PetscInto the Hash Table */
476         key = (row/bs)*Nbs+(col/bs)+1;
477         h1  = HASH(size,key,tmp);
478 
479         idx = h1;
480         if (PetscDefined(USE_DEBUG)) {
481           insert_ct++;
482           total_ct++;
483           if (HT[idx] != key) {
484             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
485             if (idx == size) {
486               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
487               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
488             }
489           }
490         } else if (HT[idx] != key) {
491           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
492           if (idx == size) {
493             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
494             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
495           }
496         }
497         /* A HASH table entry is found, so insert the values at the correct address */
498         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
499         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
500       }
501     } else if (!baij->donotstash) {
502       if (roworiented) {
503         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
504       } else {
505         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
506       }
507     }
508   }
509   if (PetscDefined(USE_DEBUG)) {
510     baij->ht_total_ct  += total_ct;
511     baij->ht_insert_ct += insert_ct;
512   }
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
517 {
518   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
519   PetscBool         roworiented = baij->roworiented;
520   PetscErrorCode    ierr;
521   PetscInt          i,j,ii,jj,row,col;
522   PetscInt          rstart=baij->rstartbs;
523   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
524   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
525   PetscReal         tmp;
526   MatScalar         **HD = baij->hd,*baij_a;
527   const PetscScalar *v_t,*value;
528   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
529 
530   PetscFunctionBegin;
531   if (roworiented) stepval = (n-1)*bs;
532   else stepval = (m-1)*bs;
533 
534   for (i=0; i<m; i++) {
535     if (PetscDefined(USE_DEBUG)) {
536       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
537       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
538     }
539     row = im[i];
540     v_t = v + i*nbs2;
541     if (row >= rstart && row < rend) {
542       for (j=0; j<n; j++) {
543         col = in[j];
544 
545         /* Look up into the Hash Table */
546         key = row*Nbs+col+1;
547         h1  = HASH(size,key,tmp);
548 
549         idx = h1;
550         if (PetscDefined(USE_DEBUG)) {
551           total_ct++;
552           insert_ct++;
553           if (HT[idx] != key) {
554             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
555             if (idx == size) {
556               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
557               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
558             }
559           }
560         } else if (HT[idx] != key) {
561           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
562           if (idx == size) {
563             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
564             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
565           }
566         }
567         baij_a = HD[idx];
568         if (roworiented) {
569           /*value = v + i*(stepval+bs)*bs + j*bs;*/
570           /* value = v + (i*(stepval+bs)+j)*bs; */
571           value = v_t;
572           v_t  += bs;
573           if (addv == ADD_VALUES) {
574             for (ii=0; ii<bs; ii++,value+=stepval) {
575               for (jj=ii; jj<bs2; jj+=bs) {
576                 baij_a[jj] += *value++;
577               }
578             }
579           } else {
580             for (ii=0; ii<bs; ii++,value+=stepval) {
581               for (jj=ii; jj<bs2; jj+=bs) {
582                 baij_a[jj] = *value++;
583               }
584             }
585           }
586         } else {
587           value = v + j*(stepval+bs)*bs + i*bs;
588           if (addv == ADD_VALUES) {
589             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
590               for (jj=0; jj<bs; jj++) {
591                 baij_a[jj] += *value++;
592               }
593             }
594           } else {
595             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
596               for (jj=0; jj<bs; jj++) {
597                 baij_a[jj] = *value++;
598               }
599             }
600           }
601         }
602       }
603     } else {
604       if (!baij->donotstash) {
605         if (roworiented) {
606           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
607         } else {
608           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
609         }
610       }
611     }
612   }
613   if (PetscDefined(USE_DEBUG)) {
614     baij->ht_total_ct  += total_ct;
615     baij->ht_insert_ct += insert_ct;
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621 {
622   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
623   PetscErrorCode ierr;
624   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
625   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
626 
627   PetscFunctionBegin;
628   for (i=0; i<m; i++) {
629     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
630     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
631     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
632       row = idxm[i] - bsrstart;
633       for (j=0; j<n; j++) {
634         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
635         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
636         if (idxn[j] >= bscstart && idxn[j] < bscend) {
637           col  = idxn[j] - bscstart;
638           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
639         } else {
640           if (!baij->colmap) {
641             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
642           }
643 #if defined(PETSC_USE_CTABLE)
644           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
645           data--;
646 #else
647           data = baij->colmap[idxn[j]/bs]-1;
648 #endif
649           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
650           else {
651             col  = data + idxn[j]%bs;
652             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
653           }
654         }
655       }
656     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
662 {
663   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
664   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
665   PetscErrorCode ierr;
666   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
667   PetscReal      sum = 0.0;
668   MatScalar      *v;
669 
670   PetscFunctionBegin;
671   if (baij->size == 1) {
672     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
673   } else {
674     if (type == NORM_FROBENIUS) {
675       v  = amat->a;
676       nz = amat->nz*bs2;
677       for (i=0; i<nz; i++) {
678         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
679       }
680       v  = bmat->a;
681       nz = bmat->nz*bs2;
682       for (i=0; i<nz; i++) {
683         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
684       }
685       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
686       *nrm = PetscSqrtReal(*nrm);
687     } else if (type == NORM_1) { /* max column sum */
688       PetscReal *tmp,*tmp2;
689       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
690       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
691       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
692       v    = amat->a; jj = amat->j;
693       for (i=0; i<amat->nz; i++) {
694         for (j=0; j<bs; j++) {
695           col = bs*(cstart + *jj) + j; /* column index */
696           for (row=0; row<bs; row++) {
697             tmp[col] += PetscAbsScalar(*v);  v++;
698           }
699         }
700         jj++;
701       }
702       v = bmat->a; jj = bmat->j;
703       for (i=0; i<bmat->nz; i++) {
704         for (j=0; j<bs; j++) {
705           col = bs*garray[*jj] + j;
706           for (row=0; row<bs; row++) {
707             tmp[col] += PetscAbsScalar(*v); v++;
708           }
709         }
710         jj++;
711       }
712       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
713       *nrm = 0.0;
714       for (j=0; j<mat->cmap->N; j++) {
715         if (tmp2[j] > *nrm) *nrm = tmp2[j];
716       }
717       ierr = PetscFree(tmp);CHKERRQ(ierr);
718       ierr = PetscFree(tmp2);CHKERRQ(ierr);
719     } else if (type == NORM_INFINITY) { /* max row sum */
720       PetscReal *sums;
721       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
722       sum  = 0.0;
723       for (j=0; j<amat->mbs; j++) {
724         for (row=0; row<bs; row++) sums[row] = 0.0;
725         v  = amat->a + bs2*amat->i[j];
726         nz = amat->i[j+1]-amat->i[j];
727         for (i=0; i<nz; i++) {
728           for (col=0; col<bs; col++) {
729             for (row=0; row<bs; row++) {
730               sums[row] += PetscAbsScalar(*v); v++;
731             }
732           }
733         }
734         v  = bmat->a + bs2*bmat->i[j];
735         nz = bmat->i[j+1]-bmat->i[j];
736         for (i=0; i<nz; i++) {
737           for (col=0; col<bs; col++) {
738             for (row=0; row<bs; row++) {
739               sums[row] += PetscAbsScalar(*v); v++;
740             }
741           }
742         }
743         for (row=0; row<bs; row++) {
744           if (sums[row] > sum) sum = sums[row];
745         }
746       }
747       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
748       ierr = PetscFree(sums);CHKERRQ(ierr);
749     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 /*
755   Creates the hash table, and sets the table
756   This table is created only once.
757   If new entried need to be added to the matrix
758   then the hash table has to be destroyed and
759   recreated.
760 */
761 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
762 {
763   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
764   Mat            A     = baij->A,B=baij->B;
765   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
766   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
767   PetscErrorCode ierr;
768   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
769   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
770   PetscInt       *HT,key;
771   MatScalar      **HD;
772   PetscReal      tmp;
773 #if defined(PETSC_USE_INFO)
774   PetscInt ct=0,max=0;
775 #endif
776 
777   PetscFunctionBegin;
778   if (baij->ht) PetscFunctionReturn(0);
779 
780   baij->ht_size = (PetscInt)(factor*nz);
781   ht_size       = baij->ht_size;
782 
783   /* Allocate Memory for Hash Table */
784   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
785   HD   = baij->hd;
786   HT   = baij->ht;
787 
788   /* Loop Over A */
789   for (i=0; i<a->mbs; i++) {
790     for (j=ai[i]; j<ai[i+1]; j++) {
791       row = i+rstart;
792       col = aj[j]+cstart;
793 
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++) {
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = a->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812   /* Loop Over B */
813   for (i=0; i<b->mbs; i++) {
814     for (j=bi[i]; j<bi[i+1]; j++) {
815       row = i+rstart;
816       col = garray[bj[j]];
817       key = row*Nbs + col + 1;
818       h1  = HASH(ht_size,key,tmp);
819       for (k=0; k<ht_size; k++) {
820         if (!HT[(h1+k)%ht_size]) {
821           HT[(h1+k)%ht_size] = key;
822           HD[(h1+k)%ht_size] = b->a + j*bs2;
823           break;
824 #if defined(PETSC_USE_INFO)
825         } else {
826           ct++;
827 #endif
828         }
829       }
830 #if defined(PETSC_USE_INFO)
831       if (k> max) max = k;
832 #endif
833     }
834   }
835 
836   /* Print Summary */
837 #if defined(PETSC_USE_INFO)
838   for (i=0,j=0; i<ht_size; i++) {
839     if (HT[i]) j++;
840   }
841   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
842 #endif
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847 {
848   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
849   PetscErrorCode ierr;
850   PetscInt       nstash,reallocs;
851 
852   PetscFunctionBegin;
853   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
854 
855   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
856   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
857   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
858   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
859   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
860   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
865 {
866   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
867   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
868   PetscErrorCode ierr;
869   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
870   PetscInt       *row,*col;
871   PetscBool      r1,r2,r3,other_disassembled;
872   MatScalar      *val;
873   PetscMPIInt    n;
874 
875   PetscFunctionBegin;
876   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
877   if (!baij->donotstash && !mat->nooffprocentries) {
878     while (1) {
879       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
880       if (!flg) break;
881 
882       for (i=0; i<n;) {
883         /* Now identify the consecutive vals belonging to the same row */
884         for (j=i,rstart=row[j]; j<n; j++) {
885           if (row[j] != rstart) break;
886         }
887         if (j < n) ncols = j-i;
888         else       ncols = n-i;
889         /* Now assemble all these values with a single function call */
890         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
891         i    = j;
892       }
893     }
894     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
895     /* Now process the block-stash. Since the values are stashed column-oriented,
896        set the roworiented flag to column oriented, and after MatSetValues()
897        restore the original flags */
898     r1 = baij->roworiented;
899     r2 = a->roworiented;
900     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
901 
902     baij->roworiented = PETSC_FALSE;
903     a->roworiented    = PETSC_FALSE;
904 
905     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
906     while (1) {
907       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
908       if (!flg) break;
909 
910       for (i=0; i<n;) {
911         /* Now identify the consecutive vals belonging to the same row */
912         for (j=i,rstart=row[j]; j<n; j++) {
913           if (row[j] != rstart) break;
914         }
915         if (j < n) ncols = j-i;
916         else       ncols = n-i;
917         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
918         i    = j;
919       }
920     }
921     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
922 
923     baij->roworiented = r1;
924     a->roworiented    = r2;
925 
926     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
927   }
928 
929   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
931 
932   /* determine if any processor has disassembled, if so we must
933      also disassemble ourselfs, in order that we may reassemble. */
934   /*
935      if nonzero structure of submatrix B cannot change then we know that
936      no processor disassembled thus we can skip this stuff
937   */
938   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
939     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
940     if (mat->was_assembled && !other_disassembled) {
941       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
942     }
943   }
944 
945   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
946     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
947   }
948   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
950 
951 #if defined(PETSC_USE_INFO)
952   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
953     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
954 
955     baij->ht_total_ct  = 0;
956     baij->ht_insert_ct = 0;
957   }
958 #endif
959   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961 
962     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
963     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
964   }
965 
966   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
967 
968   baij->rowvalues = NULL;
969 
970   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
971   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
972     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
973     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
979 #include <petscdraw.h>
980 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
981 {
982   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
983   PetscErrorCode    ierr;
984   PetscMPIInt       rank = baij->rank;
985   PetscInt          bs   = mat->rmap->bs;
986   PetscBool         iascii,isdraw;
987   PetscViewer       sviewer;
988   PetscViewerFormat format;
989 
990   PetscFunctionBegin;
991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
993   if (iascii) {
994     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
995     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
996       MatInfo info;
997       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
998       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
999       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1000       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1001                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1002       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1003       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1004       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1005       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1006       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1009       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1010       PetscFunctionReturn(0);
1011     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1012       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1013       PetscFunctionReturn(0);
1014     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1015       PetscFunctionReturn(0);
1016     }
1017   }
1018 
1019   if (isdraw) {
1020     PetscDraw draw;
1021     PetscBool isnull;
1022     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1023     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1024     if (isnull) PetscFunctionReturn(0);
1025   }
1026 
1027   {
1028     /* assemble the entire matrix onto first processor. */
1029     Mat         A;
1030     Mat_SeqBAIJ *Aloc;
1031     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1032     MatScalar   *a;
1033     const char  *matname;
1034 
1035     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1036     /* Perhaps this should be the type of mat? */
1037     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1038     if (!rank) {
1039       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1040     } else {
1041       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1042     }
1043     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1044     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1045     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1046     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1047 
1048     /* copy over the A part */
1049     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1050     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1051     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1052 
1053     for (i=0; i<mbs; i++) {
1054       rvals[0] = bs*(baij->rstartbs + i);
1055       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1056       for (j=ai[i]; j<ai[i+1]; j++) {
1057         col = (baij->cstartbs+aj[j])*bs;
1058         for (k=0; k<bs; k++) {
1059           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1060           col++; a += bs;
1061         }
1062       }
1063     }
1064     /* copy over the B part */
1065     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1066     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1067     for (i=0; i<mbs; i++) {
1068       rvals[0] = bs*(baij->rstartbs + i);
1069       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1070       for (j=ai[i]; j<ai[i+1]; j++) {
1071         col = baij->garray[aj[j]]*bs;
1072         for (k=0; k<bs; k++) {
1073           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1074           col++; a += bs;
1075         }
1076       }
1077     }
1078     ierr = PetscFree(rvals);CHKERRQ(ierr);
1079     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081     /*
1082        Everyone has to call to draw the matrix since the graphics waits are
1083        synchronized across all processors that share the PetscDraw object
1084     */
1085     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1086     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1087     if (!rank) {
1088       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1089       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1090     }
1091     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1093     ierr = MatDestroy(&A);CHKERRQ(ierr);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1099 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1100 {
1101   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1102   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1103   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1104   const PetscInt *garray = aij->garray;
1105   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1106   PetscInt       *rowlens,*colidxs;
1107   PetscScalar    *matvals;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1112 
1113   M  = mat->rmap->N;
1114   N  = mat->cmap->N;
1115   m  = mat->rmap->n;
1116   rs = mat->rmap->rstart;
1117   cs = mat->cmap->rstart;
1118   bs = mat->rmap->bs;
1119   nz = bs*bs*(A->nz + B->nz);
1120 
1121   /* write matrix header */
1122   header[0] = MAT_FILE_CLASSID;
1123   header[1] = M; header[2] = N; header[3] = nz;
1124   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1125   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1126 
1127   /* fill in and store row lengths */
1128   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1129   for (cnt=0, i=0; i<A->mbs; i++)
1130     for (j=0; j<bs; j++)
1131       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1132   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1133   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1134 
1135   /* fill in and store column indices */
1136   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1137   for (cnt=0, i=0; i<A->mbs; i++) {
1138     for (k=0; k<bs; k++) {
1139       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1140         if (garray[B->j[jb]] > cs/bs) break;
1141         for (l=0; l<bs; l++)
1142           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1143       }
1144       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1145         for (l=0; l<bs; l++)
1146           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1147       for (; jb<B->i[i+1]; jb++)
1148         for (l=0; l<bs; l++)
1149           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1150     }
1151   }
1152   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1153   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1154   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1155 
1156   /* fill in and store nonzero values */
1157   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1158   for (cnt=0, i=0; i<A->mbs; i++) {
1159     for (k=0; k<bs; k++) {
1160       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1161         if (garray[B->j[jb]] > cs/bs) break;
1162         for (l=0; l<bs; l++)
1163           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1164       }
1165       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1166         for (l=0; l<bs; l++)
1167           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1168       for (; jb<B->i[i+1]; jb++)
1169         for (l=0; l<bs; l++)
1170           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1171     }
1172   }
1173   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1174   ierr = PetscFree(matvals);CHKERRQ(ierr);
1175 
1176   /* write block size option to the viewer's .info file */
1177   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1182 {
1183   PetscErrorCode ierr;
1184   PetscBool      iascii,isdraw,issocket,isbinary;
1185 
1186   PetscFunctionBegin;
1187   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1188   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1189   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1190   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1191   if (iascii || isdraw || issocket) {
1192     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1193   } else if (isbinary) {
1194     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1200 {
1201   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205 #if defined(PETSC_USE_LOG)
1206   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1207 #endif
1208   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1209   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1210   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1211   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1212 #if defined(PETSC_USE_CTABLE)
1213   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1214 #else
1215   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1216 #endif
1217   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1218   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1219   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1220   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1221   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1222   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1223   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1224   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1225 
1226   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1235 #if defined(PETSC_HAVE_HYPRE)
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1243 {
1244   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1245   PetscErrorCode ierr;
1246   PetscInt       nt;
1247 
1248   PetscFunctionBegin;
1249   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1250   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1251   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1252   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1253   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1255   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261 {
1262   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1268   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1274 {
1275   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279   /* do nondiagonal part */
1280   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281   /* do local part */
1282   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1283   /* add partial results together */
1284   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1285   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1290 {
1291   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   /* do nondiagonal part */
1296   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1297   /* do local part */
1298   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1299   /* add partial results together */
1300   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 /*
1306   This only works correctly for square matrices where the subblock A->A is the
1307    diagonal block
1308 */
1309 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1310 {
1311   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1316   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1321 {
1322   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1327   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1334   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1335   PetscErrorCode ierr;
1336   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1337   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1338   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1339 
1340   PetscFunctionBegin;
1341   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1342   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1343   mat->getrowactive = PETSC_TRUE;
1344 
1345   if (!mat->rowvalues && (idx || v)) {
1346     /*
1347         allocate enough space to hold information from the longest row.
1348     */
1349     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1350     PetscInt    max = 1,mbs = mat->mbs,tmp;
1351     for (i=0; i<mbs; i++) {
1352       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1353       if (max < tmp) max = tmp;
1354     }
1355     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1356   }
1357   lrow = row - brstart;
1358 
1359   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1360   if (!v)   {pvA = NULL; pvB = NULL;}
1361   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1362   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1363   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1364   nztot = nzA + nzB;
1365 
1366   cmap = mat->garray;
1367   if (v  || idx) {
1368     if (nztot) {
1369       /* Sort by increasing column numbers, assuming A and B already sorted */
1370       PetscInt imark = -1;
1371       if (v) {
1372         *v = v_p = mat->rowvalues;
1373         for (i=0; i<nzB; i++) {
1374           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1375           else break;
1376         }
1377         imark = i;
1378         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1379         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1380       }
1381       if (idx) {
1382         *idx = idx_p = mat->rowindices;
1383         if (imark > -1) {
1384           for (i=0; i<imark; i++) {
1385             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1386           }
1387         } else {
1388           for (i=0; i<nzB; i++) {
1389             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1390             else break;
1391           }
1392           imark = i;
1393         }
1394         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1395         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1396       }
1397     } else {
1398       if (idx) *idx = NULL;
1399       if (v)   *v   = NULL;
1400     }
1401   }
1402   *nz  = nztot;
1403   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1404   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1409 {
1410   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1411 
1412   PetscFunctionBegin;
1413   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1414   baij->getrowactive = PETSC_FALSE;
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1419 {
1420   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1425   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1432   Mat            A  = a->A,B = a->B;
1433   PetscErrorCode ierr;
1434   PetscLogDouble isend[5],irecv[5];
1435 
1436   PetscFunctionBegin;
1437   info->block_size = (PetscReal)matin->rmap->bs;
1438 
1439   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1440 
1441   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1442   isend[3] = info->memory;  isend[4] = info->mallocs;
1443 
1444   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1445 
1446   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1447   isend[3] += info->memory;  isend[4] += info->mallocs;
1448 
1449   if (flag == MAT_LOCAL) {
1450     info->nz_used      = isend[0];
1451     info->nz_allocated = isend[1];
1452     info->nz_unneeded  = isend[2];
1453     info->memory       = isend[3];
1454     info->mallocs      = isend[4];
1455   } else if (flag == MAT_GLOBAL_MAX) {
1456     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1457 
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   } else if (flag == MAT_GLOBAL_SUM) {
1464     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1472   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1473   info->fill_ratio_needed = 0;
1474   info->factor_mallocs    = 0;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1479 {
1480   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   switch (op) {
1485   case MAT_NEW_NONZERO_LOCATIONS:
1486   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1487   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1488   case MAT_KEEP_NONZERO_PATTERN:
1489   case MAT_NEW_NONZERO_LOCATION_ERR:
1490     MatCheckPreallocated(A,1);
1491     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1492     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1493     break;
1494   case MAT_ROW_ORIENTED:
1495     MatCheckPreallocated(A,1);
1496     a->roworiented = flg;
1497 
1498     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1499     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1500     break;
1501   case MAT_FORCE_DIAGONAL_ENTRIES:
1502   case MAT_SORTED_FULL:
1503     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1504     break;
1505   case MAT_IGNORE_OFF_PROC_ENTRIES:
1506     a->donotstash = flg;
1507     break;
1508   case MAT_USE_HASH_TABLE:
1509     a->ht_flag = flg;
1510     a->ht_fact = 1.39;
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SUBMAT_SINGLEIS:
1516   case MAT_SYMMETRY_ETERNAL:
1517     MatCheckPreallocated(A,1);
1518     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1519     break;
1520   default:
1521     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1527 {
1528   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1529   Mat_SeqBAIJ    *Aloc;
1530   Mat            B;
1531   PetscErrorCode ierr;
1532   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1533   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1534   MatScalar      *a;
1535 
1536   PetscFunctionBegin;
1537   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1538     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1539     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1540     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1541     /* Do not know preallocation information, but must set block size */
1542     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1543   } else {
1544     B = *matout;
1545   }
1546 
1547   /* copy over the A part */
1548   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1549   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1551 
1552   for (i=0; i<mbs; i++) {
1553     rvals[0] = bs*(baij->rstartbs + i);
1554     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1555     for (j=ai[i]; j<ai[i+1]; j++) {
1556       col = (baij->cstartbs+aj[j])*bs;
1557       for (k=0; k<bs; k++) {
1558         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1559 
1560         col++; a += bs;
1561       }
1562     }
1563   }
1564   /* copy over the B part */
1565   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1566   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1567   for (i=0; i<mbs; i++) {
1568     rvals[0] = bs*(baij->rstartbs + i);
1569     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1570     for (j=ai[i]; j<ai[i+1]; j++) {
1571       col = baij->garray[aj[j]]*bs;
1572       for (k=0; k<bs; k++) {
1573         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1574         col++;
1575         a += bs;
1576       }
1577     }
1578   }
1579   ierr = PetscFree(rvals);CHKERRQ(ierr);
1580   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582 
1583   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1584   else {
1585     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1591 {
1592   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1593   Mat            a     = baij->A,b = baij->B;
1594   PetscErrorCode ierr;
1595   PetscInt       s1,s2,s3;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1599   if (rr) {
1600     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1601     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1602     /* Overlap communication with computation. */
1603     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604   }
1605   if (ll) {
1606     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1607     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1608     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1609   }
1610   /* scale  the diagonal block */
1611   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1612 
1613   if (rr) {
1614     /* Do a scatter end and then right scale the off-diagonal block */
1615     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1616     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1622 {
1623   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1624   PetscInt      *lrows;
1625   PetscInt       r, len;
1626   PetscBool      cong;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   /* get locally owned rows */
1631   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1632   /* fix right hand side if needed */
1633   if (x && b) {
1634     const PetscScalar *xx;
1635     PetscScalar       *bb;
1636 
1637     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1638     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1639     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1640     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1641     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1642   }
1643 
1644   /* actually zap the local rows */
1645   /*
1646         Zero the required rows. If the "diagonal block" of the matrix
1647      is square and the user wishes to set the diagonal we use separate
1648      code so that MatSetValues() is not called for each diagonal allocating
1649      new memory, thus calling lots of mallocs and slowing things down.
1650 
1651   */
1652   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1653   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1654   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1655   if ((diag != 0.0) && cong) {
1656     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1657   } else if (diag != 0.0) {
1658     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1659     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1660        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1661     for (r = 0; r < len; ++r) {
1662       const PetscInt row = lrows[r] + A->rmap->rstart;
1663       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   } else {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1669   }
1670   ierr = PetscFree(lrows);CHKERRQ(ierr);
1671 
1672   /* only change matrix nonzero state if pattern was allowed to be changed */
1673   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1674     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1675     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1681 {
1682   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1683   PetscErrorCode    ierr;
1684   PetscMPIInt       n = A->rmap->n,p = 0;
1685   PetscInt          i,j,k,r,len = 0,row,col,count;
1686   PetscInt          *lrows,*owners = A->rmap->range;
1687   PetscSFNode       *rrows;
1688   PetscSF           sf;
1689   const PetscScalar *xx;
1690   PetscScalar       *bb,*mask;
1691   Vec               xmask,lmask;
1692   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1693   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1694   PetscScalar       *aa;
1695 
1696   PetscFunctionBegin;
1697   /* Create SF where leaves are input rows and roots are owned rows */
1698   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1699   for (r = 0; r < n; ++r) lrows[r] = -1;
1700   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1701   for (r = 0; r < N; ++r) {
1702     const PetscInt idx   = rows[r];
1703     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1704     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1705       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1706     }
1707     rrows[r].rank  = p;
1708     rrows[r].index = rows[r] - owners[p];
1709   }
1710   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1711   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1712   /* Collect flags for rows to be zeroed */
1713   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1714   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1715   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1716   /* Compress and put in row numbers */
1717   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1718   /* zero diagonal part of matrix */
1719   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1720   /* handle off diagonal part of matrix */
1721   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1722   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1723   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1724   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1725   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1726   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1729   if (x) {
1730     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1731     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1733     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1734   }
1735   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1736   /* remove zeroed rows of off diagonal matrix */
1737   for (i = 0; i < len; ++i) {
1738     row   = lrows[i];
1739     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1740     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1741     for (k = 0; k < count; ++k) {
1742       aa[0] = 0.0;
1743       aa   += bs;
1744     }
1745   }
1746   /* loop over all elements of off process part of matrix zeroing removed columns*/
1747   for (i = 0; i < l->B->rmap->N; ++i) {
1748     row = i/bs;
1749     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1750       for (k = 0; k < bs; ++k) {
1751         col = bs*baij->j[j] + k;
1752         if (PetscAbsScalar(mask[col])) {
1753           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1754           if (x) bb[i] -= aa[0]*xx[col];
1755           aa[0] = 0.0;
1756         }
1757       }
1758     }
1759   }
1760   if (x) {
1761     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1762     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1763   }
1764   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1765   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1766   ierr = PetscFree(lrows);CHKERRQ(ierr);
1767 
1768   /* only change matrix nonzero state if pattern was allowed to be changed */
1769   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1770     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1771     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1777 {
1778   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1779   PetscErrorCode ierr;
1780 
1781   PetscFunctionBegin;
1782   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1787 
1788 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1789 {
1790   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1791   Mat            a,b,c,d;
1792   PetscBool      flg;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   a = matA->A; b = matA->B;
1797   c = matB->A; d = matB->B;
1798 
1799   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1800   if (flg) {
1801     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1802   }
1803   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1808 {
1809   PetscErrorCode ierr;
1810   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1811   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1812 
1813   PetscFunctionBegin;
1814   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1815   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1816     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1817   } else {
1818     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1819     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1820   }
1821   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1826 {
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1835 {
1836   PetscErrorCode ierr;
1837   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1838   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1839   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1847 {
1848   PetscErrorCode ierr;
1849   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1850   PetscBLASInt   bnz,one=1;
1851   Mat_SeqBAIJ    *x,*y;
1852   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1853 
1854   PetscFunctionBegin;
1855   if (str == SAME_NONZERO_PATTERN) {
1856     PetscScalar alpha = a;
1857     x    = (Mat_SeqBAIJ*)xx->A->data;
1858     y    = (Mat_SeqBAIJ*)yy->A->data;
1859     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1860     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1861     x    = (Mat_SeqBAIJ*)xx->B->data;
1862     y    = (Mat_SeqBAIJ*)yy->B->data;
1863     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1864     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1865     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1866   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1867     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1868   } else {
1869     Mat      B;
1870     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1871     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1872     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1873     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1874     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1875     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1876     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1877     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1878     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1880     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1881     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1882     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1883     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1884     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1891 {
1892 #if defined(PETSC_USE_COMPLEX)
1893   PetscErrorCode ierr;
1894   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr);
1898   ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr);
1899 #else
1900   PetscFunctionBegin;
1901 #endif
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1912   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1917 {
1918   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1923   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1928 {
1929   PetscErrorCode ierr;
1930   IS             iscol_local;
1931   PetscInt       csize;
1932 
1933   PetscFunctionBegin;
1934   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1935   if (call == MAT_REUSE_MATRIX) {
1936     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1937     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1938   } else {
1939     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1940   }
1941   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1942   if (call == MAT_INITIAL_MATRIX) {
1943     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1944     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 /*
1950   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1951   in local and then by concatenating the local matrices the end result.
1952   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1953   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1954 */
1955 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1956 {
1957   PetscErrorCode ierr;
1958   PetscMPIInt    rank,size;
1959   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1960   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1961   Mat            M,Mreuse;
1962   MatScalar      *vwork,*aa;
1963   MPI_Comm       comm;
1964   IS             isrow_new, iscol_new;
1965   Mat_SeqBAIJ    *aij;
1966 
1967   PetscFunctionBegin;
1968   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1969   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
1970   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1971   /* The compression and expansion should be avoided. Doesn't point
1972      out errors, might change the indices, hence buggey */
1973   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1974   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1975 
1976   if (call ==  MAT_REUSE_MATRIX) {
1977     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1978     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1979     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1980   } else {
1981     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1982   }
1983   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1984   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1985   /*
1986       m - number of local rows
1987       n - number of columns (same on all processors)
1988       rstart - first row in new global matrix generated
1989   */
1990   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1991   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1992   m    = m/bs;
1993   n    = n/bs;
1994 
1995   if (call == MAT_INITIAL_MATRIX) {
1996     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1997     ii  = aij->i;
1998     jj  = aij->j;
1999 
2000     /*
2001         Determine the number of non-zeros in the diagonal and off-diagonal
2002         portions of the matrix in order to do correct preallocation
2003     */
2004 
2005     /* first get start and end of "diagonal" columns */
2006     if (csize == PETSC_DECIDE) {
2007       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2008       if (mglobal == n*bs) { /* square matrix */
2009         nlocal = m;
2010       } else {
2011         nlocal = n/size + ((n % size) > rank);
2012       }
2013     } else {
2014       nlocal = csize/bs;
2015     }
2016     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
2017     rstart = rend - nlocal;
2018     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2019 
2020     /* next, compute all the lengths */
2021     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2022     for (i=0; i<m; i++) {
2023       jend = ii[i+1] - ii[i];
2024       olen = 0;
2025       dlen = 0;
2026       for (j=0; j<jend; j++) {
2027         if (*jj < rstart || *jj >= rend) olen++;
2028         else dlen++;
2029         jj++;
2030       }
2031       olens[i] = olen;
2032       dlens[i] = dlen;
2033     }
2034     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2035     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2036     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2037     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2038     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2039     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2040   } else {
2041     PetscInt ml,nl;
2042 
2043     M    = *newmat;
2044     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2045     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2046     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2047     /*
2048          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2049        rather than the slower MatSetValues().
2050     */
2051     M->was_assembled = PETSC_TRUE;
2052     M->assembled     = PETSC_FALSE;
2053   }
2054   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2055   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2056   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2057   ii   = aij->i;
2058   jj   = aij->j;
2059   aa   = aij->a;
2060   for (i=0; i<m; i++) {
2061     row   = rstart/bs + i;
2062     nz    = ii[i+1] - ii[i];
2063     cwork = jj;     jj += nz;
2064     vwork = aa;     aa += nz*bs*bs;
2065     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2066   }
2067 
2068   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   *newmat = M;
2071 
2072   /* save submatrix used in processor for next request */
2073   if (call ==  MAT_INITIAL_MATRIX) {
2074     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2075     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2081 {
2082   MPI_Comm       comm,pcomm;
2083   PetscInt       clocal_size,nrows;
2084   const PetscInt *rows;
2085   PetscMPIInt    size;
2086   IS             crowp,lcolp;
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2091   /* make a collective version of 'rowp' */
2092   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2093   if (pcomm==comm) {
2094     crowp = rowp;
2095   } else {
2096     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2097     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2098     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2099     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2100   }
2101   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2102   /* make a local version of 'colp' */
2103   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr);
2105   if (size==1) {
2106     lcolp = colp;
2107   } else {
2108     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2109   }
2110   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2111   /* now we just get the submatrix */
2112   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2113   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2114   /* clean up */
2115   if (pcomm!=comm) {
2116     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2117   }
2118   if (size>1) {
2119     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2125 {
2126   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2127   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2128 
2129   PetscFunctionBegin;
2130   if (nghosts) *nghosts = B->nbs;
2131   if (ghosts) *ghosts = baij->garray;
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2136 {
2137   Mat            B;
2138   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2139   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2140   Mat_SeqAIJ     *b;
2141   PetscErrorCode ierr;
2142   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2143   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2144   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2145 
2146   PetscFunctionBegin;
2147   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2148   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2149 
2150   /* ----------------------------------------------------------------
2151      Tell every processor the number of nonzeros per row
2152   */
2153   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2154   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2155     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2156   }
2157   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2158   displs    = recvcounts + size;
2159   for (i=0; i<size; i++) {
2160     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2161     displs[i]     = A->rmap->range[i]/bs;
2162   }
2163 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2164   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2165 #else
2166   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2167   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2168 #endif
2169   /* ---------------------------------------------------------------
2170      Create the sequential matrix of the same type as the local block diagonal
2171   */
2172   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2173   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2174   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2175   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2176   b    = (Mat_SeqAIJ*)B->data;
2177 
2178   /*--------------------------------------------------------------------
2179     Copy my part of matrix column indices over
2180   */
2181   sendcount  = ad->nz + bd->nz;
2182   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2183   a_jsendbuf = ad->j;
2184   b_jsendbuf = bd->j;
2185   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2186   cnt        = 0;
2187   for (i=0; i<n; i++) {
2188 
2189     /* put in lower diagonal portion */
2190     m = bd->i[i+1] - bd->i[i];
2191     while (m > 0) {
2192       /* is it above diagonal (in bd (compressed) numbering) */
2193       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2194       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2195       m--;
2196     }
2197 
2198     /* put in diagonal portion */
2199     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2200       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2201     }
2202 
2203     /* put in upper diagonal portion */
2204     while (m-- > 0) {
2205       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2206     }
2207   }
2208   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2209 
2210   /*--------------------------------------------------------------------
2211     Gather all column indices to all processors
2212   */
2213   for (i=0; i<size; i++) {
2214     recvcounts[i] = 0;
2215     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2216       recvcounts[i] += lens[j];
2217     }
2218   }
2219   displs[0] = 0;
2220   for (i=1; i<size; i++) {
2221     displs[i] = displs[i-1] + recvcounts[i-1];
2222   }
2223 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2224   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2225 #else
2226   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2227 #endif
2228   /*--------------------------------------------------------------------
2229     Assemble the matrix into useable form (note numerical values not yet set)
2230   */
2231   /* set the b->ilen (length of each row) values */
2232   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2233   /* set the b->i indices */
2234   b->i[0] = 0;
2235   for (i=1; i<=A->rmap->N/bs; i++) {
2236     b->i[i] = b->i[i-1] + lens[i-1];
2237   }
2238   ierr = PetscFree(lens);CHKERRQ(ierr);
2239   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2240   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2241   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2242 
2243   if (A->symmetric) {
2244     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2245   } else if (A->hermitian) {
2246     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2247   } else if (A->structurally_symmetric) {
2248     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2249   }
2250   *newmat = B;
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2255 {
2256   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2257   PetscErrorCode ierr;
2258   Vec            bb1 = NULL;
2259 
2260   PetscFunctionBegin;
2261   if (flag == SOR_APPLY_UPPER) {
2262     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2263     PetscFunctionReturn(0);
2264   }
2265 
2266   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2267     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2268   }
2269 
2270   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2271     if (flag & SOR_ZERO_INITIAL_GUESS) {
2272       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2273       its--;
2274     }
2275 
2276     while (its--) {
2277       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2278       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2279 
2280       /* update rhs: bb1 = bb - B*x */
2281       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2282       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2283 
2284       /* local sweep */
2285       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2286     }
2287   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2288     if (flag & SOR_ZERO_INITIAL_GUESS) {
2289       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2290       its--;
2291     }
2292     while (its--) {
2293       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2294       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2295 
2296       /* update rhs: bb1 = bb - B*x */
2297       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2298       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2299 
2300       /* local sweep */
2301       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2302     }
2303   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2304     if (flag & SOR_ZERO_INITIAL_GUESS) {
2305       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2306       its--;
2307     }
2308     while (its--) {
2309       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2310       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2311 
2312       /* update rhs: bb1 = bb - B*x */
2313       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2314       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2315 
2316       /* local sweep */
2317       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2318     }
2319   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2320 
2321   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,ReductionType type,PetscReal *reductions)
2326 {
2327   PetscErrorCode ierr;
2328   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2329   PetscInt       m,N,i,*garray = aij->garray;
2330   PetscInt       ib,jb,bs = A->rmap->bs;
2331   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2332   MatScalar      *a_val = a_aij->a;
2333   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2334   MatScalar      *b_val = b_aij->a;
2335   PetscReal      *work;
2336 
2337   PetscFunctionBegin;
2338   ierr = MatGetSize(A,&m,&N);CHKERRQ(ierr);
2339   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2340   if (type == REDUCTION_NORM_2) {
2341     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2342       for (jb=0; jb<bs; jb++) {
2343         for (ib=0; ib<bs; ib++) {
2344           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2345           a_val++;
2346         }
2347       }
2348     }
2349     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2350       for (jb=0; jb<bs; jb++) {
2351         for (ib=0; ib<bs; ib++) {
2352           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2353           b_val++;
2354         }
2355       }
2356     }
2357   } else if (type == REDUCTION_NORM_1) {
2358     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2359       for (jb=0; jb<bs; jb++) {
2360         for (ib=0; ib<bs; ib++) {
2361           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2362           a_val++;
2363         }
2364       }
2365     }
2366     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2367       for (jb=0; jb<bs; jb++) {
2368        for (ib=0; ib<bs; ib++) {
2369           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2370           b_val++;
2371         }
2372       }
2373     }
2374   } else if (type == REDUCTION_NORM_INFINITY) {
2375     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2376       for (jb=0; jb<bs; jb++) {
2377         for (ib=0; ib<bs; ib++) {
2378           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2379           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2380           a_val++;
2381         }
2382       }
2383     }
2384     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2385       for (jb=0; jb<bs; jb++) {
2386         for (ib=0; ib<bs; ib++) {
2387           int col = garray[b_aij->j[i]] * bs + jb;
2388           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2389           b_val++;
2390         }
2391       }
2392     }
2393   } else if (type == REDUCTION_SUM || REDUCTION_MEAN) {
2394     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2395       for (jb=0; jb<bs; jb++) {
2396         for (ib=0; ib<bs; ib++) {
2397           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += *a_val;
2398           a_val++;
2399         }
2400       }
2401     }
2402     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2403       for (jb=0; jb<bs; jb++) {
2404        for (ib=0; ib<bs; ib++) {
2405           work[garray[b_aij->j[i]] * bs + jb] += *b_val;
2406           b_val++;
2407         }
2408       }
2409     }
2410   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown ReductionType");
2411   if (type == REDUCTION_NORM_INFINITY) {
2412     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2413   } else {
2414     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2415   }
2416   ierr = PetscFree(work);CHKERRQ(ierr);
2417   if (type == REDUCTION_NORM_2) {
2418     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2419   } else if (type == REDUCTION_MEAN) {
2420     for (i=0; i<N; i++) reductions[i] /= m;
2421   }
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2426 {
2427   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2428   PetscErrorCode ierr;
2429 
2430   PetscFunctionBegin;
2431   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2432   A->factorerrortype             = a->A->factorerrortype;
2433   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2434   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2439 {
2440   PetscErrorCode ierr;
2441   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2442   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2443 
2444   PetscFunctionBegin;
2445   if (!Y->preallocated) {
2446     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2447   } else if (!aij->nz) {
2448     PetscInt nonew = aij->nonew;
2449     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2450     aij->nonew = nonew;
2451   }
2452   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2457 {
2458   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2459   PetscErrorCode ierr;
2460 
2461   PetscFunctionBegin;
2462   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2463   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2464   if (d) {
2465     PetscInt rstart;
2466     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2467     *d += rstart/A->rmap->bs;
2468 
2469   }
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2474 {
2475   PetscFunctionBegin;
2476   *a = ((Mat_MPIBAIJ*)A->data)->A;
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 /* -------------------------------------------------------------------*/
2481 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2482                                        MatGetRow_MPIBAIJ,
2483                                        MatRestoreRow_MPIBAIJ,
2484                                        MatMult_MPIBAIJ,
2485                                 /* 4*/ MatMultAdd_MPIBAIJ,
2486                                        MatMultTranspose_MPIBAIJ,
2487                                        MatMultTransposeAdd_MPIBAIJ,
2488                                        NULL,
2489                                        NULL,
2490                                        NULL,
2491                                 /*10*/ NULL,
2492                                        NULL,
2493                                        NULL,
2494                                        MatSOR_MPIBAIJ,
2495                                        MatTranspose_MPIBAIJ,
2496                                 /*15*/ MatGetInfo_MPIBAIJ,
2497                                        MatEqual_MPIBAIJ,
2498                                        MatGetDiagonal_MPIBAIJ,
2499                                        MatDiagonalScale_MPIBAIJ,
2500                                        MatNorm_MPIBAIJ,
2501                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2502                                        MatAssemblyEnd_MPIBAIJ,
2503                                        MatSetOption_MPIBAIJ,
2504                                        MatZeroEntries_MPIBAIJ,
2505                                 /*24*/ MatZeroRows_MPIBAIJ,
2506                                        NULL,
2507                                        NULL,
2508                                        NULL,
2509                                        NULL,
2510                                 /*29*/ MatSetUp_MPIBAIJ,
2511                                        NULL,
2512                                        NULL,
2513                                        MatGetDiagonalBlock_MPIBAIJ,
2514                                        NULL,
2515                                 /*34*/ MatDuplicate_MPIBAIJ,
2516                                        NULL,
2517                                        NULL,
2518                                        NULL,
2519                                        NULL,
2520                                 /*39*/ MatAXPY_MPIBAIJ,
2521                                        MatCreateSubMatrices_MPIBAIJ,
2522                                        MatIncreaseOverlap_MPIBAIJ,
2523                                        MatGetValues_MPIBAIJ,
2524                                        MatCopy_MPIBAIJ,
2525                                 /*44*/ NULL,
2526                                        MatScale_MPIBAIJ,
2527                                        MatShift_MPIBAIJ,
2528                                        NULL,
2529                                        MatZeroRowsColumns_MPIBAIJ,
2530                                 /*49*/ NULL,
2531                                        NULL,
2532                                        NULL,
2533                                        NULL,
2534                                        NULL,
2535                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2536                                        NULL,
2537                                        MatSetUnfactored_MPIBAIJ,
2538                                        MatPermute_MPIBAIJ,
2539                                        MatSetValuesBlocked_MPIBAIJ,
2540                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2541                                        MatDestroy_MPIBAIJ,
2542                                        MatView_MPIBAIJ,
2543                                        NULL,
2544                                        NULL,
2545                                 /*64*/ NULL,
2546                                        NULL,
2547                                        NULL,
2548                                        NULL,
2549                                        NULL,
2550                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2551                                        NULL,
2552                                        NULL,
2553                                        NULL,
2554                                        NULL,
2555                                 /*74*/ NULL,
2556                                        MatFDColoringApply_BAIJ,
2557                                        NULL,
2558                                        NULL,
2559                                        NULL,
2560                                 /*79*/ NULL,
2561                                        NULL,
2562                                        NULL,
2563                                        NULL,
2564                                        MatLoad_MPIBAIJ,
2565                                 /*84*/ NULL,
2566                                        NULL,
2567                                        NULL,
2568                                        NULL,
2569                                        NULL,
2570                                 /*89*/ NULL,
2571                                        NULL,
2572                                        NULL,
2573                                        NULL,
2574                                        NULL,
2575                                 /*94*/ NULL,
2576                                        NULL,
2577                                        NULL,
2578                                        NULL,
2579                                        NULL,
2580                                 /*99*/ NULL,
2581                                        NULL,
2582                                        NULL,
2583                                        MatConjugate_MPIBAIJ,
2584                                        NULL,
2585                                 /*104*/NULL,
2586                                        MatRealPart_MPIBAIJ,
2587                                        MatImaginaryPart_MPIBAIJ,
2588                                        NULL,
2589                                        NULL,
2590                                 /*109*/NULL,
2591                                        NULL,
2592                                        NULL,
2593                                        NULL,
2594                                        MatMissingDiagonal_MPIBAIJ,
2595                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2596                                        NULL,
2597                                        MatGetGhosts_MPIBAIJ,
2598                                        NULL,
2599                                        NULL,
2600                                 /*119*/NULL,
2601                                        NULL,
2602                                        NULL,
2603                                        NULL,
2604                                        MatGetMultiProcBlock_MPIBAIJ,
2605                                 /*124*/NULL,
2606                                        MatGetColumnReductions_MPIBAIJ,
2607                                        MatInvertBlockDiagonal_MPIBAIJ,
2608                                        NULL,
2609                                        NULL,
2610                                /*129*/ NULL,
2611                                        NULL,
2612                                        NULL,
2613                                        NULL,
2614                                        NULL,
2615                                /*134*/ NULL,
2616                                        NULL,
2617                                        NULL,
2618                                        NULL,
2619                                        NULL,
2620                                /*139*/ MatSetBlockSizes_Default,
2621                                        NULL,
2622                                        NULL,
2623                                        MatFDColoringSetUp_MPIXAIJ,
2624                                        NULL,
2625                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2626 };
2627 
2628 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2629 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2630 
2631 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2632 {
2633   PetscInt       m,rstart,cstart,cend;
2634   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2635   const PetscInt *JJ    =NULL;
2636   PetscScalar    *values=NULL;
2637   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2638   PetscErrorCode ierr;
2639   PetscBool      nooffprocentries;
2640 
2641   PetscFunctionBegin;
2642   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2643   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2644   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2645   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2646   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2647   m      = B->rmap->n/bs;
2648   rstart = B->rmap->rstart/bs;
2649   cstart = B->cmap->rstart/bs;
2650   cend   = B->cmap->rend/bs;
2651 
2652   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2653   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2654   for (i=0; i<m; i++) {
2655     nz = ii[i+1] - ii[i];
2656     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2657     nz_max = PetscMax(nz_max,nz);
2658     dlen   = 0;
2659     olen   = 0;
2660     JJ     = jj + ii[i];
2661     for (j=0; j<nz; j++) {
2662       if (*JJ < cstart || *JJ >= cend) olen++;
2663       else dlen++;
2664       JJ++;
2665     }
2666     d_nnz[i] = dlen;
2667     o_nnz[i] = olen;
2668   }
2669   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2670   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2671 
2672   values = (PetscScalar*)V;
2673   if (!values) {
2674     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2675   }
2676   for (i=0; i<m; i++) {
2677     PetscInt          row    = i + rstart;
2678     PetscInt          ncols  = ii[i+1] - ii[i];
2679     const PetscInt    *icols = jj + ii[i];
2680     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2681       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2682       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2683     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2684       PetscInt j;
2685       for (j=0; j<ncols; j++) {
2686         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2687         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2688       }
2689     }
2690   }
2691 
2692   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2693   nooffprocentries    = B->nooffprocentries;
2694   B->nooffprocentries = PETSC_TRUE;
2695   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2696   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2697   B->nooffprocentries = nooffprocentries;
2698 
2699   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 /*@C
2704    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2705 
2706    Collective
2707 
2708    Input Parameters:
2709 +  B - the matrix
2710 .  bs - the block size
2711 .  i - the indices into j for the start of each local row (starts with zero)
2712 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2713 -  v - optional values in the matrix
2714 
2715    Level: advanced
2716 
2717    Notes:
2718     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2719    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2720    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2721    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2722    block column and the second index is over columns within a block.
2723 
2724    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2725 
2726 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2727 @*/
2728 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2729 {
2730   PetscErrorCode ierr;
2731 
2732   PetscFunctionBegin;
2733   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2734   PetscValidType(B,1);
2735   PetscValidLogicalCollectiveInt(B,bs,2);
2736   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2741 {
2742   Mat_MPIBAIJ    *b;
2743   PetscErrorCode ierr;
2744   PetscInt       i;
2745   PetscMPIInt    size;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2749   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2750   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2751   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2752 
2753   if (d_nnz) {
2754     for (i=0; i<B->rmap->n/bs; i++) {
2755       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2756     }
2757   }
2758   if (o_nnz) {
2759     for (i=0; i<B->rmap->n/bs; i++) {
2760       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2761     }
2762   }
2763 
2764   b      = (Mat_MPIBAIJ*)B->data;
2765   b->bs2 = bs*bs;
2766   b->mbs = B->rmap->n/bs;
2767   b->nbs = B->cmap->n/bs;
2768   b->Mbs = B->rmap->N/bs;
2769   b->Nbs = B->cmap->N/bs;
2770 
2771   for (i=0; i<=b->size; i++) {
2772     b->rangebs[i] = B->rmap->range[i]/bs;
2773   }
2774   b->rstartbs = B->rmap->rstart/bs;
2775   b->rendbs   = B->rmap->rend/bs;
2776   b->cstartbs = B->cmap->rstart/bs;
2777   b->cendbs   = B->cmap->rend/bs;
2778 
2779 #if defined(PETSC_USE_CTABLE)
2780   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2781 #else
2782   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2783 #endif
2784   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2785   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2786   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2787 
2788   /* Because the B will have been resized we simply destroy it and create a new one each time */
2789   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2790   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2791   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2792   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2793   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2794   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2795 
2796   if (!B->preallocated) {
2797     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2798     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2799     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2800     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2801     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2802   }
2803 
2804   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2805   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2806   B->preallocated  = PETSC_TRUE;
2807   B->was_assembled = PETSC_FALSE;
2808   B->assembled     = PETSC_FALSE;
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2813 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2814 
2815 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2816 {
2817   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2818   PetscErrorCode ierr;
2819   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2820   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2821   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2822 
2823   PetscFunctionBegin;
2824   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2825   ii[0] = 0;
2826   for (i=0; i<M; i++) {
2827     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2828     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2829     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2830     /* remove one from count of matrix has diagonal */
2831     for (j=id[i]; j<id[i+1]; j++) {
2832       if (jd[j] == i) {ii[i+1]--;break;}
2833     }
2834   }
2835   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2836   cnt  = 0;
2837   for (i=0; i<M; i++) {
2838     for (j=io[i]; j<io[i+1]; j++) {
2839       if (garray[jo[j]] > rstart) break;
2840       jj[cnt++] = garray[jo[j]];
2841     }
2842     for (k=id[i]; k<id[i+1]; k++) {
2843       if (jd[k] != i) {
2844         jj[cnt++] = rstart + jd[k];
2845       }
2846     }
2847     for (; j<io[i+1]; j++) {
2848       jj[cnt++] = garray[jo[j]];
2849     }
2850   }
2851   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2856 
2857 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2858 
2859 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2860 {
2861   PetscErrorCode ierr;
2862   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2863   Mat            B;
2864   Mat_MPIAIJ     *b;
2865 
2866   PetscFunctionBegin;
2867   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2868 
2869   if (reuse == MAT_REUSE_MATRIX) {
2870     B = *newmat;
2871   } else {
2872     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2873     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2874     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2875     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2876     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2877     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2878   }
2879   b = (Mat_MPIAIJ*) B->data;
2880 
2881   if (reuse == MAT_REUSE_MATRIX) {
2882     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2883     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2884   } else {
2885     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2886     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2887     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2888     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2889     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2890     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2891     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2892   }
2893   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2894   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2895 
2896   if (reuse == MAT_INPLACE_MATRIX) {
2897     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2898   } else {
2899    *newmat = B;
2900   }
2901   PetscFunctionReturn(0);
2902 }
2903 
2904 /*MC
2905    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2906 
2907    Options Database Keys:
2908 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2909 . -mat_block_size <bs> - set the blocksize used to store the matrix
2910 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2911 - -mat_use_hash_table <fact>
2912 
2913    Level: beginner
2914 
2915    Notes:
2916     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2917     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2918 
2919 .seealso: MatCreateBAIJ
2920 M*/
2921 
2922 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2923 
2924 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2925 {
2926   Mat_MPIBAIJ    *b;
2927   PetscErrorCode ierr;
2928   PetscBool      flg = PETSC_FALSE;
2929 
2930   PetscFunctionBegin;
2931   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2932   B->data = (void*)b;
2933 
2934   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2935   B->assembled = PETSC_FALSE;
2936 
2937   B->insertmode = NOT_SET_VALUES;
2938   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2939   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2940 
2941   /* build local table of row and column ownerships */
2942   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2943 
2944   /* build cache for off array entries formed */
2945   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2946 
2947   b->donotstash  = PETSC_FALSE;
2948   b->colmap      = NULL;
2949   b->garray      = NULL;
2950   b->roworiented = PETSC_TRUE;
2951 
2952   /* stuff used in block assembly */
2953   b->barray = NULL;
2954 
2955   /* stuff used for matrix vector multiply */
2956   b->lvec  = NULL;
2957   b->Mvctx = NULL;
2958 
2959   /* stuff for MatGetRow() */
2960   b->rowindices   = NULL;
2961   b->rowvalues    = NULL;
2962   b->getrowactive = PETSC_FALSE;
2963 
2964   /* hash table stuff */
2965   b->ht           = NULL;
2966   b->hd           = NULL;
2967   b->ht_size      = 0;
2968   b->ht_flag      = PETSC_FALSE;
2969   b->ht_fact      = 0;
2970   b->ht_total_ct  = 0;
2971   b->ht_insert_ct = 0;
2972 
2973   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2974   b->ijonly = PETSC_FALSE;
2975 
2976   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2977   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2978   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2979 #if defined(PETSC_HAVE_HYPRE)
2980   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2981 #endif
2982   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2983   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2985   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2986   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2987   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2988   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2989   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2990 
2991   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2992   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2993   if (flg) {
2994     PetscReal fact = 1.39;
2995     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2996     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2997     if (fact <= 1.0) fact = 1.39;
2998     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2999     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3000   }
3001   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3002   PetscFunctionReturn(0);
3003 }
3004 
3005 /*MC
3006    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3007 
3008    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3009    and MATMPIBAIJ otherwise.
3010 
3011    Options Database Keys:
3012 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3013 
3014   Level: beginner
3015 
3016 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3017 M*/
3018 
3019 /*@C
3020    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3021    (block compressed row).  For good matrix assembly performance
3022    the user should preallocate the matrix storage by setting the parameters
3023    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3024    performance can be increased by more than a factor of 50.
3025 
3026    Collective on Mat
3027 
3028    Input Parameters:
3029 +  B - the matrix
3030 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3031           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3032 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3033            submatrix  (same for all local rows)
3034 .  d_nnz - array containing the number of block nonzeros in the various block rows
3035            of the in diagonal portion of the local (possibly different for each block
3036            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3037            set it even if it is zero.
3038 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3039            submatrix (same for all local rows).
3040 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3041            off-diagonal portion of the local submatrix (possibly different for
3042            each block row) or NULL.
3043 
3044    If the *_nnz parameter is given then the *_nz parameter is ignored
3045 
3046    Options Database Keys:
3047 +   -mat_block_size - size of the blocks to use
3048 -   -mat_use_hash_table <fact>
3049 
3050    Notes:
3051    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3052    than it must be used on all processors that share the object for that argument.
3053 
3054    Storage Information:
3055    For a square global matrix we define each processor's diagonal portion
3056    to be its local rows and the corresponding columns (a square submatrix);
3057    each processor's off-diagonal portion encompasses the remainder of the
3058    local matrix (a rectangular submatrix).
3059 
3060    The user can specify preallocated storage for the diagonal part of
3061    the local submatrix with either d_nz or d_nnz (not both).  Set
3062    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3063    memory allocation.  Likewise, specify preallocated storage for the
3064    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3065 
3066    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3067    the figure below we depict these three local rows and all columns (0-11).
3068 
3069 .vb
3070            0 1 2 3 4 5 6 7 8 9 10 11
3071           --------------------------
3072    row 3  |o o o d d d o o o o  o  o
3073    row 4  |o o o d d d o o o o  o  o
3074    row 5  |o o o d d d o o o o  o  o
3075           --------------------------
3076 .ve
3077 
3078    Thus, any entries in the d locations are stored in the d (diagonal)
3079    submatrix, and any entries in the o locations are stored in the
3080    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3081    stored simply in the MATSEQBAIJ format for compressed row storage.
3082 
3083    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3084    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3085    In general, for PDE problems in which most nonzeros are near the diagonal,
3086    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3087    or you will get TERRIBLE performance; see the users' manual chapter on
3088    matrices.
3089 
3090    You can call MatGetInfo() to get information on how effective the preallocation was;
3091    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3092    You can also run with the option -info and look for messages with the string
3093    malloc in them to see if additional memory allocation was needed.
3094 
3095    Level: intermediate
3096 
3097 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3098 @*/
3099 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3100 {
3101   PetscErrorCode ierr;
3102 
3103   PetscFunctionBegin;
3104   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3105   PetscValidType(B,1);
3106   PetscValidLogicalCollectiveInt(B,bs,2);
3107   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3108   PetscFunctionReturn(0);
3109 }
3110 
3111 /*@C
3112    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3113    (block compressed row).  For good matrix assembly performance
3114    the user should preallocate the matrix storage by setting the parameters
3115    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3116    performance can be increased by more than a factor of 50.
3117 
3118    Collective
3119 
3120    Input Parameters:
3121 +  comm - MPI communicator
3122 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3123           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3124 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3125            This value should be the same as the local size used in creating the
3126            y vector for the matrix-vector product y = Ax.
3127 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3128            This value should be the same as the local size used in creating the
3129            x vector for the matrix-vector product y = Ax.
3130 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3131 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3132 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3133            submatrix  (same for all local rows)
3134 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3135            of the in diagonal portion of the local (possibly different for each block
3136            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3137            and set it even if it is zero.
3138 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3139            submatrix (same for all local rows).
3140 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3141            off-diagonal portion of the local submatrix (possibly different for
3142            each block row) or NULL.
3143 
3144    Output Parameter:
3145 .  A - the matrix
3146 
3147    Options Database Keys:
3148 +   -mat_block_size - size of the blocks to use
3149 -   -mat_use_hash_table <fact>
3150 
3151    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3152    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3153    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3154 
3155    Notes:
3156    If the *_nnz parameter is given then the *_nz parameter is ignored
3157 
3158    A nonzero block is any block that as 1 or more nonzeros in it
3159 
3160    The user MUST specify either the local or global matrix dimensions
3161    (possibly both).
3162 
3163    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3164    than it must be used on all processors that share the object for that argument.
3165 
3166    Storage Information:
3167    For a square global matrix we define each processor's diagonal portion
3168    to be its local rows and the corresponding columns (a square submatrix);
3169    each processor's off-diagonal portion encompasses the remainder of the
3170    local matrix (a rectangular submatrix).
3171 
3172    The user can specify preallocated storage for the diagonal part of
3173    the local submatrix with either d_nz or d_nnz (not both).  Set
3174    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3175    memory allocation.  Likewise, specify preallocated storage for the
3176    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3177 
3178    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3179    the figure below we depict these three local rows and all columns (0-11).
3180 
3181 .vb
3182            0 1 2 3 4 5 6 7 8 9 10 11
3183           --------------------------
3184    row 3  |o o o d d d o o o o  o  o
3185    row 4  |o o o d d d o o o o  o  o
3186    row 5  |o o o d d d o o o o  o  o
3187           --------------------------
3188 .ve
3189 
3190    Thus, any entries in the d locations are stored in the d (diagonal)
3191    submatrix, and any entries in the o locations are stored in the
3192    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3193    stored simply in the MATSEQBAIJ format for compressed row storage.
3194 
3195    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3196    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3197    In general, for PDE problems in which most nonzeros are near the diagonal,
3198    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3199    or you will get TERRIBLE performance; see the users' manual chapter on
3200    matrices.
3201 
3202    Level: intermediate
3203 
3204 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3205 @*/
3206 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3207 {
3208   PetscErrorCode ierr;
3209   PetscMPIInt    size;
3210 
3211   PetscFunctionBegin;
3212   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3213   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3214   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3215   if (size > 1) {
3216     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3217     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3218   } else {
3219     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3220     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3221   }
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3226 {
3227   Mat            mat;
3228   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3229   PetscErrorCode ierr;
3230   PetscInt       len=0;
3231 
3232   PetscFunctionBegin;
3233   *newmat = NULL;
3234   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3235   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3236   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3237 
3238   mat->factortype   = matin->factortype;
3239   mat->preallocated = PETSC_TRUE;
3240   mat->assembled    = PETSC_TRUE;
3241   mat->insertmode   = NOT_SET_VALUES;
3242 
3243   a             = (Mat_MPIBAIJ*)mat->data;
3244   mat->rmap->bs = matin->rmap->bs;
3245   a->bs2        = oldmat->bs2;
3246   a->mbs        = oldmat->mbs;
3247   a->nbs        = oldmat->nbs;
3248   a->Mbs        = oldmat->Mbs;
3249   a->Nbs        = oldmat->Nbs;
3250 
3251   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3252   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3253 
3254   a->size         = oldmat->size;
3255   a->rank         = oldmat->rank;
3256   a->donotstash   = oldmat->donotstash;
3257   a->roworiented  = oldmat->roworiented;
3258   a->rowindices   = NULL;
3259   a->rowvalues    = NULL;
3260   a->getrowactive = PETSC_FALSE;
3261   a->barray       = NULL;
3262   a->rstartbs     = oldmat->rstartbs;
3263   a->rendbs       = oldmat->rendbs;
3264   a->cstartbs     = oldmat->cstartbs;
3265   a->cendbs       = oldmat->cendbs;
3266 
3267   /* hash table stuff */
3268   a->ht           = NULL;
3269   a->hd           = NULL;
3270   a->ht_size      = 0;
3271   a->ht_flag      = oldmat->ht_flag;
3272   a->ht_fact      = oldmat->ht_fact;
3273   a->ht_total_ct  = 0;
3274   a->ht_insert_ct = 0;
3275 
3276   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3277   if (oldmat->colmap) {
3278 #if defined(PETSC_USE_CTABLE)
3279     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3280 #else
3281     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3282     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3283     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3284 #endif
3285   } else a->colmap = NULL;
3286 
3287   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3288     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3289     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3290     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3291   } else a->garray = NULL;
3292 
3293   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3294   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3295   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3296   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3297   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3298 
3299   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3300   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3301   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3302   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3303   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3304   *newmat = mat;
3305   PetscFunctionReturn(0);
3306 }
3307 
3308 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3309 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3310 {
3311   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3312   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3313   PetscScalar    *matvals;
3314   PetscErrorCode ierr;
3315 
3316   PetscFunctionBegin;
3317   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3318 
3319   /* read in matrix header */
3320   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3321   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3322   M  = header[1]; N = header[2]; nz = header[3];
3323   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3324   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3325   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3326 
3327   /* set block sizes from the viewer's .info file */
3328   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3329   /* set local sizes if not set already */
3330   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3331   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3332   /* set global sizes if not set already */
3333   if (mat->rmap->N < 0) mat->rmap->N = M;
3334   if (mat->cmap->N < 0) mat->cmap->N = N;
3335   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3336   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3337 
3338   /* check if the matrix sizes are correct */
3339   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3340   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3341   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3342   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3343   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);CHKERRQ(ierr);
3344   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);CHKERRQ(ierr);
3345   mbs = m/bs; nbs = n/bs;
3346 
3347   /* read in row lengths and build row indices */
3348   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3349   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3350   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3351   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3352   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3353 
3354   /* read in column indices and matrix values */
3355   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3356   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3357   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3358 
3359   { /* preallocate matrix storage */
3360     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3361     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3362     PetscBool  sbaij,done;
3363     PetscInt   *d_nnz,*o_nnz;
3364 
3365     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3366     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3367     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3368     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3369     for (i=0; i<mbs; i++) {
3370       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3371       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3372       for (k=0; k<bs; k++) {
3373         PetscInt row = bs*i + k;
3374         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3375           PetscInt col = colidxs[j];
3376           if (!sbaij || col >= row) {
3377             if (col >= cs && col < ce) {
3378               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3379             } else {
3380               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3381               if (done) o_nnz[i]++;
3382             }
3383           }
3384         }
3385       }
3386     }
3387     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3388     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3389     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3390     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3391     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3392   }
3393 
3394   /* store matrix values */
3395   for (i=0; i<m; i++) {
3396     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3397     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3398   }
3399 
3400   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3401   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3402   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3403   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3404   PetscFunctionReturn(0);
3405 }
3406 
3407 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3408 {
3409   PetscErrorCode ierr;
3410   PetscBool      isbinary;
3411 
3412   PetscFunctionBegin;
3413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3414   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3415   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 /*@
3420    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3421 
3422    Input Parameters:
3423 +  mat  - the matrix
3424 -  fact - factor
3425 
3426    Not Collective, each process can use a different factor
3427 
3428    Level: advanced
3429 
3430   Notes:
3431    This can also be set by the command line option: -mat_use_hash_table <fact>
3432 
3433 .seealso: MatSetOption()
3434 @*/
3435 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3436 {
3437   PetscErrorCode ierr;
3438 
3439   PetscFunctionBegin;
3440   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3441   PetscFunctionReturn(0);
3442 }
3443 
3444 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3445 {
3446   Mat_MPIBAIJ *baij;
3447 
3448   PetscFunctionBegin;
3449   baij          = (Mat_MPIBAIJ*)mat->data;
3450   baij->ht_fact = fact;
3451   PetscFunctionReturn(0);
3452 }
3453 
3454 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3455 {
3456   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3457   PetscBool      flg;
3458   PetscErrorCode ierr;
3459 
3460   PetscFunctionBegin;
3461   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3462   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3463   if (Ad)     *Ad     = a->A;
3464   if (Ao)     *Ao     = a->B;
3465   if (colmap) *colmap = a->garray;
3466   PetscFunctionReturn(0);
3467 }
3468 
3469 /*
3470     Special version for direct calls from Fortran (to eliminate two function call overheads
3471 */
3472 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3473 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3474 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3475 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3476 #endif
3477 
3478 /*@C
3479   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3480 
3481   Collective on Mat
3482 
3483   Input Parameters:
3484 + mat - the matrix
3485 . min - number of input rows
3486 . im - input rows
3487 . nin - number of input columns
3488 . in - input columns
3489 . v - numerical values input
3490 - addvin - INSERT_VALUES or ADD_VALUES
3491 
3492   Notes:
3493     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3494 
3495   Level: advanced
3496 
3497 .seealso:   MatSetValuesBlocked()
3498 @*/
3499 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3500 {
3501   /* convert input arguments to C version */
3502   Mat        mat  = *matin;
3503   PetscInt   m    = *min, n = *nin;
3504   InsertMode addv = *addvin;
3505 
3506   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3507   const MatScalar *value;
3508   MatScalar       *barray     = baij->barray;
3509   PetscBool       roworiented = baij->roworiented;
3510   PetscErrorCode  ierr;
3511   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3512   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3513   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3514 
3515   PetscFunctionBegin;
3516   /* tasks normally handled by MatSetValuesBlocked() */
3517   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3518   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3519   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3520   if (mat->assembled) {
3521     mat->was_assembled = PETSC_TRUE;
3522     mat->assembled     = PETSC_FALSE;
3523   }
3524   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3525 
3526   if (!barray) {
3527     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3528     baij->barray = barray;
3529   }
3530 
3531   if (roworiented) stepval = (n-1)*bs;
3532   else stepval = (m-1)*bs;
3533 
3534   for (i=0; i<m; i++) {
3535     if (im[i] < 0) continue;
3536     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3537     if (im[i] >= rstart && im[i] < rend) {
3538       row = im[i] - rstart;
3539       for (j=0; j<n; j++) {
3540         /* If NumCol = 1 then a copy is not required */
3541         if ((roworiented) && (n == 1)) {
3542           barray = (MatScalar*)v + i*bs2;
3543         } else if ((!roworiented) && (m == 1)) {
3544           barray = (MatScalar*)v + j*bs2;
3545         } else { /* Here a copy is required */
3546           if (roworiented) {
3547             value = v + i*(stepval+bs)*bs + j*bs;
3548           } else {
3549             value = v + j*(stepval+bs)*bs + i*bs;
3550           }
3551           for (ii=0; ii<bs; ii++,value+=stepval) {
3552             for (jj=0; jj<bs; jj++) {
3553               *barray++ = *value++;
3554             }
3555           }
3556           barray -=bs2;
3557         }
3558 
3559         if (in[j] >= cstart && in[j] < cend) {
3560           col  = in[j] - cstart;
3561           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3562         } else if (in[j] < 0) continue;
3563         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3564         else {
3565           if (mat->was_assembled) {
3566             if (!baij->colmap) {
3567               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3568             }
3569 
3570 #if defined(PETSC_USE_DEBUG)
3571 #if defined(PETSC_USE_CTABLE)
3572             { PetscInt data;
3573               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3574               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3575             }
3576 #else
3577             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3578 #endif
3579 #endif
3580 #if defined(PETSC_USE_CTABLE)
3581             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3582             col  = (col - 1)/bs;
3583 #else
3584             col = (baij->colmap[in[j]] - 1)/bs;
3585 #endif
3586             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3587               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3588               col  =  in[j];
3589             }
3590           } else col = in[j];
3591           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3592         }
3593       }
3594     } else {
3595       if (!baij->donotstash) {
3596         if (roworiented) {
3597           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3598         } else {
3599           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3600         }
3601       }
3602     }
3603   }
3604 
3605   /* task normally handled by MatSetValuesBlocked() */
3606   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3607   PetscFunctionReturn(0);
3608 }
3609 
3610 /*@
3611      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3612          CSR format the local rows.
3613 
3614    Collective
3615 
3616    Input Parameters:
3617 +  comm - MPI communicator
3618 .  bs - the block size, only a block size of 1 is supported
3619 .  m - number of local rows (Cannot be PETSC_DECIDE)
3620 .  n - This value should be the same as the local size used in creating the
3621        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3622        calculated if N is given) For square matrices n is almost always m.
3623 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3624 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3625 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3626 .   j - column indices
3627 -   a - matrix values
3628 
3629    Output Parameter:
3630 .   mat - the matrix
3631 
3632    Level: intermediate
3633 
3634    Notes:
3635        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3636      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3637      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3638 
3639      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3640      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3641      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3642      with column-major ordering within blocks.
3643 
3644        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3645 
3646 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3647           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3648 @*/
3649 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3650 {
3651   PetscErrorCode ierr;
3652 
3653   PetscFunctionBegin;
3654   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3655   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3656   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3657   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3658   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3659   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3660   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3661   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3662   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3663   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3664   PetscFunctionReturn(0);
3665 }
3666 
3667 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3668 {
3669   PetscErrorCode ierr;
3670   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3671   PetscInt       *indx;
3672   PetscScalar    *values;
3673 
3674   PetscFunctionBegin;
3675   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3676   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3677     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3678     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3679     PetscInt       *bindx,rmax=a->rmax,j;
3680     PetscMPIInt    rank,size;
3681 
3682     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3683     mbs = m/bs; Nbs = N/cbs;
3684     if (n == PETSC_DECIDE) {
3685       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr);
3686     }
3687     nbs = n/cbs;
3688 
3689     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3690     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3691 
3692     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3693     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
3694     if (rank == size-1) {
3695       /* Check sum(nbs) = Nbs */
3696       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3697     }
3698 
3699     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3700     for (i=0; i<mbs; i++) {
3701       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3702       nnz = nnz/bs;
3703       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3704       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3705       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3706     }
3707     ierr = PetscFree(bindx);CHKERRQ(ierr);
3708 
3709     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3710     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3711     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3712     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3713     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3714     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3715     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3716     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3717   }
3718 
3719   /* numeric phase */
3720   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3721   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3722 
3723   for (i=0; i<m; i++) {
3724     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3725     Ii   = i + rstart;
3726     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3727     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3728   }
3729   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3730   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3731   PetscFunctionReturn(0);
3732 }
3733