xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 40cbb1a031ea8f2be4fe2b92dc842b003ad37be3)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
15   PetscScalar       *va,*vv;
16   Vec               vB,vA;
17   const PetscScalar *vb;
18 
19   PetscFunctionBegin;
20   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
21   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
22 
23   PetscCall(VecGetArrayWrite(vA,&va));
24   if (idx) {
25     for (i=0; i<m; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
31   PetscCall(PetscMalloc1(m,&idxb));
32   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
33 
34   PetscCall(VecGetArrayWrite(v,&vv));
35   PetscCall(VecGetArrayRead(vB,&vb));
36   for (i=0; i<m; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
38       vv[i] = vb[i];
39       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
40     } else {
41       vv[i] = va[i];
42       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
43         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
44     }
45   }
46   PetscCall(VecRestoreArrayWrite(vA,&vv));
47   PetscCall(VecRestoreArrayWrite(vA,&va));
48   PetscCall(VecRestoreArrayRead(vB,&vb));
49   PetscCall(PetscFree(idxb));
50   PetscCall(VecDestroy(&vA));
51   PetscCall(VecDestroy(&vB));
52   PetscFunctionReturn(0);
53 }
54 
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58 
59   PetscFunctionBegin;
60   PetscCall(MatStoreValues(aij->A));
61   PetscCall(MatStoreValues(aij->B));
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
68 
69   PetscFunctionBegin;
70   PetscCall(MatRetrieveValues(aij->A));
71   PetscCall(MatRetrieveValues(aij->B));
72   PetscFunctionReturn(0);
73 }
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scalable way since the
79    length of colmap equals the global matrix length.
80 */
81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
82 {
83   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
84   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
85   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
86 
87   PetscFunctionBegin;
88 #if defined(PETSC_USE_CTABLE)
89   PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap));
90   for (i=0; i<nbs; i++) {
91     PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES));
92   }
93 #else
94   PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap));
95   PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt)));
96   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
102   { \
103     brow = row/bs;  \
104     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
105     rmax = aimax[brow]; nrow = ailen[brow]; \
106     bcol = col/bs; \
107     ridx = row % bs; cidx = col % bs; \
108     low  = 0; high = nrow; \
109     while (high-low > 3) { \
110       t = (low+high)/2; \
111       if (rp[t] > bcol) high = t; \
112       else              low  = t; \
113     } \
114     for (_i=low; _i<high; _i++) { \
115       if (rp[_i] > bcol) break; \
116       if (rp[_i] == bcol) { \
117         bap = ap +  bs2*_i + bs*cidx + ridx; \
118         if (addv == ADD_VALUES) *bap += value;  \
119         else                    *bap  = value;  \
120         goto a_noinsert; \
121       } \
122     } \
123     if (a->nonew == 1) goto a_noinsert; \
124     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
125     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
126     N = nrow++ - 1;  \
127     /* shift up all the later entries in this row */ \
128     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\
129     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
130     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
131     rp[_i]                      = bcol;  \
132     ap[bs2*_i + bs*cidx + ridx] = value;  \
133 a_noinsert:; \
134     ailen[brow] = nrow; \
135   }
136 
137 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
138   { \
139     brow = row/bs;  \
140     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
141     rmax = bimax[brow]; nrow = bilen[brow]; \
142     bcol = col/bs; \
143     ridx = row % bs; cidx = col % bs; \
144     low  = 0; high = nrow; \
145     while (high-low > 3) { \
146       t = (low+high)/2; \
147       if (rp[t] > bcol) high = t; \
148       else              low  = t; \
149     } \
150     for (_i=low; _i<high; _i++) { \
151       if (rp[_i] > bcol) break; \
152       if (rp[_i] == bcol) { \
153         bap = ap +  bs2*_i + bs*cidx + ridx; \
154         if (addv == ADD_VALUES) *bap += value;  \
155         else                    *bap  = value;  \
156         goto b_noinsert; \
157       } \
158     } \
159     if (b->nonew == 1) goto b_noinsert; \
160     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
161     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
162     N = nrow++ - 1;  \
163     /* shift up all the later entries in this row */ \
164     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\
165     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\
166     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
167     rp[_i]                      = bcol;  \
168     ap[bs2*_i + bs*cidx + ridx] = value;  \
169 b_noinsert:; \
170     bilen[brow] = nrow; \
171   }
172 
173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
174 {
175   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
176   MatScalar      value;
177   PetscBool      roworiented = baij->roworiented;
178   PetscInt       i,j,row,col;
179   PetscInt       rstart_orig=mat->rmap->rstart;
180   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
181   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
182 
183   /* Some Variables required in the macro */
184   Mat         A     = baij->A;
185   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
186   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
187   MatScalar   *aa   =a->a;
188 
189   Mat         B     = baij->B;
190   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
191   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
192   MatScalar   *ba   =b->a;
193 
194   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
195   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
196   MatScalar *ap,*bap;
197 
198   PetscFunctionBegin;
199   for (i=0; i<m; i++) {
200     if (im[i] < 0) continue;
201     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
202     if (im[i] >= rstart_orig && im[i] < rend_orig) {
203       row = im[i] - rstart_orig;
204       for (j=0; j<n; j++) {
205         if (in[j] >= cstart_orig && in[j] < cend_orig) {
206           col = in[j] - cstart_orig;
207           if (roworiented) value = v[i*n+j];
208           else             value = v[i+j*m];
209           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
210         } else if (in[j] < 0) continue;
211         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
216             }
217 #if defined(PETSC_USE_CTABLE)
218             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               PetscCall(MatDisAssemble_MPIBAIJ(mat));
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
238         }
239       }
240     } else {
241       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE));
246         } else {
247           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE));
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
261   PetscBool         roworiented=a->roworiented;
262   const PetscScalar *value     = v;
263   MatScalar         *ap,*aa = a->a,*bap;
264 
265   PetscFunctionBegin;
266   rp   = aj + ai[row];
267   ap   = aa + bs2*ai[row];
268   rmax = imax[row];
269   nrow = ailen[row];
270   value = v;
271   low = 0;
272   high = nrow;
273   while (high-low > 7) {
274     t = (low+high)/2;
275     if (rp[t] > col) high = t;
276     else             low  = t;
277   }
278   for (i=low; i<high; i++) {
279     if (rp[i] > col) break;
280     if (rp[i] == col) {
281       bap = ap +  bs2*i;
282       if (roworiented) {
283         if (is == ADD_VALUES) {
284           for (ii=0; ii<bs; ii++) {
285             for (jj=ii; jj<bs2; jj+=bs) {
286               bap[jj] += *value++;
287             }
288           }
289         } else {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] = *value++;
293             }
294           }
295         }
296       } else {
297         if (is == ADD_VALUES) {
298           for (ii=0; ii<bs; ii++,value+=bs) {
299             for (jj=0; jj<bs; jj++) {
300               bap[jj] += value[jj];
301             }
302             bap += bs;
303           }
304         } else {
305           for (ii=0; ii<bs; ii++,value+=bs) {
306             for (jj=0; jj<bs; jj++) {
307               bap[jj]  = value[jj];
308             }
309             bap += bs;
310           }
311         }
312       }
313       goto noinsert2;
314     }
315   }
316   if (nonew == 1) goto noinsert2;
317   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
318   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
319   N = nrow++ - 1; high++;
320   /* shift up all the later entries in this row */
321   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
322   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
323   rp[i] = col;
324   bap   = ap +  bs2*i;
325   if (roworiented) {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=ii; jj<bs2; jj+=bs) {
328         bap[jj] = *value++;
329       }
330     }
331   } else {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=0; jj<bs; jj++) {
334         *bap++ = *value++;
335       }
336     }
337   }
338   noinsert2:;
339   ailen[row] = nrow;
340   PetscFunctionReturn(0);
341 }
342 
343 /*
344     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
345     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
346 */
347 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
348 {
349   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
350   const PetscScalar *value;
351   MatScalar         *barray     = baij->barray;
352   PetscBool         roworiented = baij->roworiented;
353   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
354   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
355   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
356 
357   PetscFunctionBegin;
358   if (!barray) {
359     PetscCall(PetscMalloc1(bs2,&barray));
360     baij->barray = barray;
361   }
362 
363   if (roworiented) stepval = (n-1)*bs;
364   else stepval = (m-1)*bs;
365 
366   for (i=0; i<m; i++) {
367     if (im[i] < 0) continue;
368     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
369     if (im[i] >= rstart && im[i] < rend) {
370       row = im[i] - rstart;
371       for (j=0; j<n; j++) {
372         /* If NumCol = 1 then a copy is not required */
373         if ((roworiented) && (n == 1)) {
374           barray = (MatScalar*)v + i*bs2;
375         } else if ((!roworiented) && (m == 1)) {
376           barray = (MatScalar*)v + j*bs2;
377         } else { /* Here a copy is required */
378           if (roworiented) {
379             value = v + (i*(stepval+bs) + j)*bs;
380           } else {
381             value = v + (j*(stepval+bs) + i)*bs;
382           }
383           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
384             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
385             barray += bs;
386           }
387           barray -= bs2;
388         }
389 
390         if (in[j] >= cstart && in[j] < cend) {
391           col  = in[j] - cstart;
392           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
393         } else if (in[j] < 0) continue;
394         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
395         else {
396           if (mat->was_assembled) {
397             if (!baij->colmap) {
398               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
399             }
400 
401 #if defined(PETSC_USE_DEBUG)
402 #if defined(PETSC_USE_CTABLE)
403             { PetscInt data;
404               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
405               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
406             }
407 #else
408             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
409 #endif
410 #endif
411 #if defined(PETSC_USE_CTABLE)
412             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
413             col  = (col - 1)/bs;
414 #else
415             col = (baij->colmap[in[j]] - 1)/bs;
416 #endif
417             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
418               PetscCall(MatDisAssemble_MPIBAIJ(mat));
419               col  =  in[j];
420             } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]);
421           } else col = in[j];
422           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
423         }
424       }
425     } else {
426       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
427       if (!baij->donotstash) {
428         if (roworiented) {
429           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
430         } else {
431           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
432         }
433       }
434     }
435   }
436   PetscFunctionReturn(0);
437 }
438 
439 #define HASH_KEY 0.6180339887
440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
441 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
442 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
443 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
444 {
445   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
446   PetscBool      roworiented = baij->roworiented;
447   PetscInt       i,j,row,col;
448   PetscInt       rstart_orig=mat->rmap->rstart;
449   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
450   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
451   PetscReal      tmp;
452   MatScalar      **HD = baij->hd,value;
453   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
454 
455   PetscFunctionBegin;
456   for (i=0; i<m; i++) {
457     if (PetscDefined(USE_DEBUG)) {
458       PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
459       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
460     }
461     row = im[i];
462     if (row >= rstart_orig && row < rend_orig) {
463       for (j=0; j<n; j++) {
464         col = in[j];
465         if (roworiented) value = v[i*n+j];
466         else             value = v[i+j*m];
467         /* Look up PetscInto the Hash Table */
468         key = (row/bs)*Nbs+(col/bs)+1;
469         h1  = HASH(size,key,tmp);
470 
471         idx = h1;
472         if (PetscDefined(USE_DEBUG)) {
473           insert_ct++;
474           total_ct++;
475           if (HT[idx] != key) {
476             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
477             if (idx == size) {
478               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
479               PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
480             }
481           }
482         } else if (HT[idx] != key) {
483           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
484           if (idx == size) {
485             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
486             PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
487           }
488         }
489         /* A HASH table entry is found, so insert the values at the correct address */
490         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
491         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
492       }
493     } else if (!baij->donotstash) {
494       if (roworiented) {
495         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE));
496       } else {
497         PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE));
498       }
499     }
500   }
501   if (PetscDefined(USE_DEBUG)) {
502     baij->ht_total_ct  += total_ct;
503     baij->ht_insert_ct += insert_ct;
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
509 {
510   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
511   PetscBool         roworiented = baij->roworiented;
512   PetscInt          i,j,ii,jj,row,col;
513   PetscInt          rstart=baij->rstartbs;
514   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
515   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
516   PetscReal         tmp;
517   MatScalar         **HD = baij->hd,*baij_a;
518   const PetscScalar *v_t,*value;
519   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
520 
521   PetscFunctionBegin;
522   if (roworiented) stepval = (n-1)*bs;
523   else stepval = (m-1)*bs;
524 
525   for (i=0; i<m; i++) {
526     if (PetscDefined(USE_DEBUG)) {
527       PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]);
528       PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
529     }
530     row = im[i];
531     v_t = v + i*nbs2;
532     if (row >= rstart && row < rend) {
533       for (j=0; j<n; j++) {
534         col = in[j];
535 
536         /* Look up into the Hash Table */
537         key = row*Nbs+col+1;
538         h1  = HASH(size,key,tmp);
539 
540         idx = h1;
541         if (PetscDefined(USE_DEBUG)) {
542           total_ct++;
543           insert_ct++;
544           if (HT[idx] != key) {
545             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
546             if (idx == size) {
547               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
548               PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
549             }
550           }
551         } else if (HT[idx] != key) {
552           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
553           if (idx == size) {
554             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
555             PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
556           }
557         }
558         baij_a = HD[idx];
559         if (roworiented) {
560           /*value = v + i*(stepval+bs)*bs + j*bs;*/
561           /* value = v + (i*(stepval+bs)+j)*bs; */
562           value = v_t;
563           v_t  += bs;
564           if (addv == ADD_VALUES) {
565             for (ii=0; ii<bs; ii++,value+=stepval) {
566               for (jj=ii; jj<bs2; jj+=bs) {
567                 baij_a[jj] += *value++;
568               }
569             }
570           } else {
571             for (ii=0; ii<bs; ii++,value+=stepval) {
572               for (jj=ii; jj<bs2; jj+=bs) {
573                 baij_a[jj] = *value++;
574               }
575             }
576           }
577         } else {
578           value = v + j*(stepval+bs)*bs + i*bs;
579           if (addv == ADD_VALUES) {
580             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
581               for (jj=0; jj<bs; jj++) {
582                 baij_a[jj] += *value++;
583               }
584             }
585           } else {
586             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
587               for (jj=0; jj<bs; jj++) {
588                 baij_a[jj] = *value++;
589               }
590             }
591           }
592         }
593       }
594     } else {
595       if (!baij->donotstash) {
596         if (roworiented) {
597           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
598         } else {
599           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
600         }
601       }
602     }
603   }
604   if (PetscDefined(USE_DEBUG)) {
605     baij->ht_total_ct  += total_ct;
606     baij->ht_insert_ct += insert_ct;
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
612 {
613   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
614   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
615   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
616 
617   PetscFunctionBegin;
618   for (i=0; i<m; i++) {
619     if (idxm[i] < 0) continue; /* negative row */
620     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
621     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
622       row = idxm[i] - bsrstart;
623       for (j=0; j<n; j++) {
624         if (idxn[j] < 0) continue; /* negative column */
625         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
626         if (idxn[j] >= bscstart && idxn[j] < bscend) {
627           col  = idxn[j] - bscstart;
628           PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
629         } else {
630           if (!baij->colmap) {
631             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
632           }
633 #if defined(PETSC_USE_CTABLE)
634           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
635           data--;
636 #else
637           data = baij->colmap[idxn[j]/bs]-1;
638 #endif
639           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
640           else {
641             col  = data + idxn[j]%bs;
642             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
643           }
644         }
645       }
646     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
647   }
648   PetscFunctionReturn(0);
649 }
650 
651 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
652 {
653   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
654   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
655   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
656   PetscReal      sum = 0.0;
657   MatScalar      *v;
658 
659   PetscFunctionBegin;
660   if (baij->size == 1) {
661     PetscCall(MatNorm(baij->A,type,nrm));
662   } else {
663     if (type == NORM_FROBENIUS) {
664       v  = amat->a;
665       nz = amat->nz*bs2;
666       for (i=0; i<nz; i++) {
667         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
668       }
669       v  = bmat->a;
670       nz = bmat->nz*bs2;
671       for (i=0; i<nz; i++) {
672         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
673       }
674       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
675       *nrm = PetscSqrtReal(*nrm);
676     } else if (type == NORM_1) { /* max column sum */
677       PetscReal *tmp,*tmp2;
678       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
679       PetscCall(PetscCalloc1(mat->cmap->N,&tmp));
680       PetscCall(PetscMalloc1(mat->cmap->N,&tmp2));
681       v    = amat->a; jj = amat->j;
682       for (i=0; i<amat->nz; i++) {
683         for (j=0; j<bs; j++) {
684           col = bs*(cstart + *jj) + j; /* column index */
685           for (row=0; row<bs; row++) {
686             tmp[col] += PetscAbsScalar(*v);  v++;
687           }
688         }
689         jj++;
690       }
691       v = bmat->a; jj = bmat->j;
692       for (i=0; i<bmat->nz; i++) {
693         for (j=0; j<bs; j++) {
694           col = bs*garray[*jj] + j;
695           for (row=0; row<bs; row++) {
696             tmp[col] += PetscAbsScalar(*v); v++;
697           }
698         }
699         jj++;
700       }
701       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
702       *nrm = 0.0;
703       for (j=0; j<mat->cmap->N; j++) {
704         if (tmp2[j] > *nrm) *nrm = tmp2[j];
705       }
706       PetscCall(PetscFree(tmp));
707       PetscCall(PetscFree(tmp2));
708     } else if (type == NORM_INFINITY) { /* max row sum */
709       PetscReal *sums;
710       PetscCall(PetscMalloc1(bs,&sums));
711       sum  = 0.0;
712       for (j=0; j<amat->mbs; j++) {
713         for (row=0; row<bs; row++) sums[row] = 0.0;
714         v  = amat->a + bs2*amat->i[j];
715         nz = amat->i[j+1]-amat->i[j];
716         for (i=0; i<nz; i++) {
717           for (col=0; col<bs; col++) {
718             for (row=0; row<bs; row++) {
719               sums[row] += PetscAbsScalar(*v); v++;
720             }
721           }
722         }
723         v  = bmat->a + bs2*bmat->i[j];
724         nz = bmat->i[j+1]-bmat->i[j];
725         for (i=0; i<nz; i++) {
726           for (col=0; col<bs; col++) {
727             for (row=0; row<bs; row++) {
728               sums[row] += PetscAbsScalar(*v); v++;
729             }
730           }
731         }
732         for (row=0; row<bs; row++) {
733           if (sums[row] > sum) sum = sums[row];
734         }
735       }
736       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
737       PetscCall(PetscFree(sums));
738     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 /*
744   Creates the hash table, and sets the table
745   This table is created only once.
746   If new entried need to be added to the matrix
747   then the hash table has to be destroyed and
748   recreated.
749 */
750 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
751 {
752   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
753   Mat            A     = baij->A,B=baij->B;
754   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
755   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
756   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
757   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
758   PetscInt       *HT,key;
759   MatScalar      **HD;
760   PetscReal      tmp;
761 #if defined(PETSC_USE_INFO)
762   PetscInt ct=0,max=0;
763 #endif
764 
765   PetscFunctionBegin;
766   if (baij->ht) PetscFunctionReturn(0);
767 
768   baij->ht_size = (PetscInt)(factor*nz);
769   ht_size       = baij->ht_size;
770 
771   /* Allocate Memory for Hash Table */
772   PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht));
773   HD   = baij->hd;
774   HT   = baij->ht;
775 
776   /* Loop Over A */
777   for (i=0; i<a->mbs; i++) {
778     for (j=ai[i]; j<ai[i+1]; j++) {
779       row = i+rstart;
780       col = aj[j]+cstart;
781 
782       key = row*Nbs + col + 1;
783       h1  = HASH(ht_size,key,tmp);
784       for (k=0; k<ht_size; k++) {
785         if (!HT[(h1+k)%ht_size]) {
786           HT[(h1+k)%ht_size] = key;
787           HD[(h1+k)%ht_size] = a->a + j*bs2;
788           break;
789 #if defined(PETSC_USE_INFO)
790         } else {
791           ct++;
792 #endif
793         }
794       }
795 #if defined(PETSC_USE_INFO)
796       if (k> max) max = k;
797 #endif
798     }
799   }
800   /* Loop Over B */
801   for (i=0; i<b->mbs; i++) {
802     for (j=bi[i]; j<bi[i+1]; j++) {
803       row = i+rstart;
804       col = garray[bj[j]];
805       key = row*Nbs + col + 1;
806       h1  = HASH(ht_size,key,tmp);
807       for (k=0; k<ht_size; k++) {
808         if (!HT[(h1+k)%ht_size]) {
809           HT[(h1+k)%ht_size] = key;
810           HD[(h1+k)%ht_size] = b->a + j*bs2;
811           break;
812 #if defined(PETSC_USE_INFO)
813         } else {
814           ct++;
815 #endif
816         }
817       }
818 #if defined(PETSC_USE_INFO)
819       if (k> max) max = k;
820 #endif
821     }
822   }
823 
824   /* Print Summary */
825 #if defined(PETSC_USE_INFO)
826   for (i=0,j=0; i<ht_size; i++) {
827     if (HT[i]) j++;
828   }
829   PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max));
830 #endif
831   PetscFunctionReturn(0);
832 }
833 
834 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
835 {
836   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
837   PetscInt       nstash,reallocs;
838 
839   PetscFunctionBegin;
840   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
841 
842   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
843   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
844   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
845   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
846   PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs));
847   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   PetscMPIInt    n;
860 
861   PetscFunctionBegin;
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   if (!baij->donotstash && !mat->nooffprocentries) {
864     while (1) {
865       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
877         i    = j;
878       }
879     }
880     PetscCall(MatStashScatterEnd_Private(&mat->stash));
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887 
888     baij->roworiented = PETSC_FALSE;
889     a->roworiented    = PETSC_FALSE;
890 
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) {
899           if (row[j] != rstart) break;
900         }
901         if (j < n) ncols = j-i;
902         else       ncols = n-i;
903         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
904         i    = j;
905       }
906     }
907     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
908 
909     baij->roworiented = r1;
910     a->roworiented    = r2;
911 
912     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
913   }
914 
915   PetscCall(MatAssemblyBegin(baij->A,mode));
916   PetscCall(MatAssemblyEnd(baij->A,mode));
917 
918   /* determine if any processor has disassembled, if so we must
919      also disassemble ourselves, in order that we may reassemble. */
920   /*
921      if nonzero structure of submatrix B cannot change then we know that
922      no processor disassembled thus we can skip this stuff
923   */
924   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
925     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
926     if (mat->was_assembled && !other_disassembled) {
927       PetscCall(MatDisAssemble_MPIBAIJ(mat));
928     }
929   }
930 
931   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
932     PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
933   }
934   PetscCall(MatAssemblyBegin(baij->B,mode));
935   PetscCall(MatAssemblyEnd(baij->B,mode));
936 
937 #if defined(PETSC_USE_INFO)
938   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
939     PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));
940 
941     baij->ht_total_ct  = 0;
942     baij->ht_insert_ct = 0;
943   }
944 #endif
945   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
946     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact));
947 
948     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
949     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
950   }
951 
952   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
953 
954   baij->rowvalues = NULL;
955 
956   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
957   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
958     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
959     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
960   }
961   PetscFunctionReturn(0);
962 }
963 
964 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
965 #include <petscdraw.h>
966 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
967 {
968   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
969   PetscMPIInt       rank = baij->rank;
970   PetscInt          bs   = mat->rmap->bs;
971   PetscBool         iascii,isdraw;
972   PetscViewer       sviewer;
973   PetscViewerFormat format;
974 
975   PetscFunctionBegin;
976   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
977   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
978   if (iascii) {
979     PetscCall(PetscViewerGetFormat(viewer,&format));
980     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
981       MatInfo info;
982       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
983       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
984       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
985       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",
986                                                  rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
987       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
988       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
989       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
990       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
991       PetscCall(PetscViewerFlush(viewer));
992       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
993       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
994       PetscCall(VecScatterView(baij->Mvctx,viewer));
995       PetscFunctionReturn(0);
996     } else if (format == PETSC_VIEWER_ASCII_INFO) {
997       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
998       PetscFunctionReturn(0);
999     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1000       PetscFunctionReturn(0);
1001     }
1002   }
1003 
1004   if (isdraw) {
1005     PetscDraw draw;
1006     PetscBool isnull;
1007     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1008     PetscCall(PetscDrawIsNull(draw,&isnull));
1009     if (isnull) PetscFunctionReturn(0);
1010   }
1011 
1012   {
1013     /* assemble the entire matrix onto first processor. */
1014     Mat         A;
1015     Mat_SeqBAIJ *Aloc;
1016     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1017     MatScalar   *a;
1018     const char  *matname;
1019 
1020     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1021     /* Perhaps this should be the type of mat? */
1022     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
1023     if (rank == 0) {
1024       PetscCall(MatSetSizes(A,M,N,M,N));
1025     } else {
1026       PetscCall(MatSetSizes(A,0,0,M,N));
1027     }
1028     PetscCall(MatSetType(A,MATMPIBAIJ));
1029     PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
1030     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
1031     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
1032 
1033     /* copy over the A part */
1034     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1035     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1036     PetscCall(PetscMalloc1(bs,&rvals));
1037 
1038     for (i=0; i<mbs; i++) {
1039       rvals[0] = bs*(baij->rstartbs + i);
1040       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1041       for (j=ai[i]; j<ai[i+1]; j++) {
1042         col = (baij->cstartbs+aj[j])*bs;
1043         for (k=0; k<bs; k++) {
1044           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
1045           col++; a += bs;
1046         }
1047       }
1048     }
1049     /* copy over the B part */
1050     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1051     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1052     for (i=0; i<mbs; i++) {
1053       rvals[0] = bs*(baij->rstartbs + i);
1054       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1055       for (j=ai[i]; j<ai[i+1]; j++) {
1056         col = baij->garray[aj[j]]*bs;
1057         for (k=0; k<bs; k++) {
1058           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
1059           col++; a += bs;
1060         }
1061       }
1062     }
1063     PetscCall(PetscFree(rvals));
1064     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1065     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1066     /*
1067        Everyone has to call to draw the matrix since the graphics waits are
1068        synchronized across all processors that share the PetscDraw object
1069     */
1070     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1071     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
1072     if (rank == 0) {
1073       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname));
1074       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer));
1075     }
1076     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1077     PetscCall(PetscViewerFlush(viewer));
1078     PetscCall(MatDestroy(&A));
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1084 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1085 {
1086   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1087   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1088   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1089   const PetscInt *garray = aij->garray;
1090   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1091   PetscInt       *rowlens,*colidxs;
1092   PetscScalar    *matvals;
1093 
1094   PetscFunctionBegin;
1095   PetscCall(PetscViewerSetUp(viewer));
1096 
1097   M  = mat->rmap->N;
1098   N  = mat->cmap->N;
1099   m  = mat->rmap->n;
1100   rs = mat->rmap->rstart;
1101   cs = mat->cmap->rstart;
1102   bs = mat->rmap->bs;
1103   nz = bs*bs*(A->nz + B->nz);
1104 
1105   /* write matrix header */
1106   header[0] = MAT_FILE_CLASSID;
1107   header[1] = M; header[2] = N; header[3] = nz;
1108   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1109   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1110 
1111   /* fill in and store row lengths */
1112   PetscCall(PetscMalloc1(m,&rowlens));
1113   for (cnt=0, i=0; i<A->mbs; i++)
1114     for (j=0; j<bs; j++)
1115       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1116   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1117   PetscCall(PetscFree(rowlens));
1118 
1119   /* fill in and store column indices */
1120   PetscCall(PetscMalloc1(nz,&colidxs));
1121   for (cnt=0, i=0; i<A->mbs; i++) {
1122     for (k=0; k<bs; k++) {
1123       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1124         if (garray[B->j[jb]] > cs/bs) break;
1125         for (l=0; l<bs; l++)
1126           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1127       }
1128       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1129         for (l=0; l<bs; l++)
1130           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1131       for (; jb<B->i[i+1]; jb++)
1132         for (l=0; l<bs; l++)
1133           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1134     }
1135   }
1136   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1137   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT));
1138   PetscCall(PetscFree(colidxs));
1139 
1140   /* fill in and store nonzero values */
1141   PetscCall(PetscMalloc1(nz,&matvals));
1142   for (cnt=0, i=0; i<A->mbs; i++) {
1143     for (k=0; k<bs; k++) {
1144       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1145         if (garray[B->j[jb]] > cs/bs) break;
1146         for (l=0; l<bs; l++)
1147           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1148       }
1149       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1150         for (l=0; l<bs; l++)
1151           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1152       for (; jb<B->i[i+1]; jb++)
1153         for (l=0; l<bs; l++)
1154           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1155     }
1156   }
1157   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR));
1158   PetscCall(PetscFree(matvals));
1159 
1160   /* write block size option to the viewer's .info file */
1161   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1166 {
1167   PetscBool      iascii,isdraw,issocket,isbinary;
1168 
1169   PetscFunctionBegin;
1170   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1171   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1172   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1173   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1174   if (iascii || isdraw || issocket) {
1175     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer));
1176   } else if (isbinary) {
1177     PetscCall(MatView_MPIBAIJ_Binary(mat,viewer));
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1183 {
1184   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1185 
1186   PetscFunctionBegin;
1187 #if defined(PETSC_USE_LOG)
1188   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1189 #endif
1190   PetscCall(MatStashDestroy_Private(&mat->stash));
1191   PetscCall(MatStashDestroy_Private(&mat->bstash));
1192   PetscCall(MatDestroy(&baij->A));
1193   PetscCall(MatDestroy(&baij->B));
1194 #if defined(PETSC_USE_CTABLE)
1195   PetscCall(PetscTableDestroy(&baij->colmap));
1196 #else
1197   PetscCall(PetscFree(baij->colmap));
1198 #endif
1199   PetscCall(PetscFree(baij->garray));
1200   PetscCall(VecDestroy(&baij->lvec));
1201   PetscCall(VecScatterDestroy(&baij->Mvctx));
1202   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1203   PetscCall(PetscFree(baij->barray));
1204   PetscCall(PetscFree2(baij->hd,baij->ht));
1205   PetscCall(PetscFree(baij->rangebs));
1206   PetscCall(PetscFree(mat->data));
1207 
1208   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1209   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1210   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL));
1215   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL));
1216   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL));
1217   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL));
1218 #if defined(PETSC_HAVE_HYPRE)
1219   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL));
1220 #endif
1221   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL));
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1226 {
1227   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1228   PetscInt       nt;
1229 
1230   PetscFunctionBegin;
1231   PetscCall(VecGetLocalSize(xx,&nt));
1232   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1233   PetscCall(VecGetLocalSize(yy,&nt));
1234   PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1235   PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1236   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
1237   PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1238   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1243 {
1244   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1245 
1246   PetscFunctionBegin;
1247   PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1248   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
1249   PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1250   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1255 {
1256   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1257 
1258   PetscFunctionBegin;
1259   /* do nondiagonal part */
1260   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1261   /* do local part */
1262   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1263   /* add partial results together */
1264   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1265   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1270 {
1271   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1272 
1273   PetscFunctionBegin;
1274   /* do nondiagonal part */
1275   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1276   /* do local part */
1277   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1278   /* add partial results together */
1279   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1280   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 /*
1285   This only works correctly for square matrices where the subblock A->A is the
1286    diagonal block
1287 */
1288 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1289 {
1290   PetscFunctionBegin;
1291   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1292   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v));
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1297 {
1298   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1299 
1300   PetscFunctionBegin;
1301   PetscCall(MatScale(a->A,aa));
1302   PetscCall(MatScale(a->B,aa));
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1307 {
1308   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1309   PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1310   PetscInt    bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1311   PetscInt    nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1312   PetscInt    *cmap,*idx_p,cstart = mat->cstartbs;
1313 
1314   PetscFunctionBegin;
1315   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1316   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1317   mat->getrowactive = PETSC_TRUE;
1318 
1319   if (!mat->rowvalues && (idx || v)) {
1320     /*
1321         allocate enough space to hold information from the longest row.
1322     */
1323     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1324     PetscInt    max = 1,mbs = mat->mbs,tmp;
1325     for (i=0; i<mbs; i++) {
1326       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1327       if (max < tmp) max = tmp;
1328     }
1329     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1330   }
1331   lrow = row - brstart;
1332 
1333   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1334   if (!v)   {pvA = NULL; pvB = NULL;}
1335   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1336   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1337   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1338   nztot = nzA + nzB;
1339 
1340   cmap = mat->garray;
1341   if (v  || idx) {
1342     if (nztot) {
1343       /* Sort by increasing column numbers, assuming A and B already sorted */
1344       PetscInt imark = -1;
1345       if (v) {
1346         *v = v_p = mat->rowvalues;
1347         for (i=0; i<nzB; i++) {
1348           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1349           else break;
1350         }
1351         imark = i;
1352         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1353         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1354       }
1355       if (idx) {
1356         *idx = idx_p = mat->rowindices;
1357         if (imark > -1) {
1358           for (i=0; i<imark; i++) {
1359             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1360           }
1361         } else {
1362           for (i=0; i<nzB; i++) {
1363             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1364             else break;
1365           }
1366           imark = i;
1367         }
1368         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1369         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1370       }
1371     } else {
1372       if (idx) *idx = NULL;
1373       if (v)   *v   = NULL;
1374     }
1375   }
1376   *nz  = nztot;
1377   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1378   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1383 {
1384   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1385 
1386   PetscFunctionBegin;
1387   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1388   baij->getrowactive = PETSC_FALSE;
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1393 {
1394   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1395 
1396   PetscFunctionBegin;
1397   PetscCall(MatZeroEntries(l->A));
1398   PetscCall(MatZeroEntries(l->B));
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1403 {
1404   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1405   Mat            A  = a->A,B = a->B;
1406   PetscLogDouble isend[5],irecv[5];
1407 
1408   PetscFunctionBegin;
1409   info->block_size = (PetscReal)matin->rmap->bs;
1410 
1411   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1412 
1413   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1414   isend[3] = info->memory;  isend[4] = info->mallocs;
1415 
1416   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1417 
1418   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1419   isend[3] += info->memory;  isend[4] += info->mallocs;
1420 
1421   if (flag == MAT_LOCAL) {
1422     info->nz_used      = isend[0];
1423     info->nz_allocated = isend[1];
1424     info->nz_unneeded  = isend[2];
1425     info->memory       = isend[3];
1426     info->mallocs      = isend[4];
1427   } else if (flag == MAT_GLOBAL_MAX) {
1428     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1429 
1430     info->nz_used      = irecv[0];
1431     info->nz_allocated = irecv[1];
1432     info->nz_unneeded  = irecv[2];
1433     info->memory       = irecv[3];
1434     info->mallocs      = irecv[4];
1435   } else if (flag == MAT_GLOBAL_SUM) {
1436     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1437 
1438     info->nz_used      = irecv[0];
1439     info->nz_allocated = irecv[1];
1440     info->nz_unneeded  = irecv[2];
1441     info->memory       = irecv[3];
1442     info->mallocs      = irecv[4];
1443   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1444   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1445   info->fill_ratio_needed = 0;
1446   info->factor_mallocs    = 0;
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1451 {
1452   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1453 
1454   PetscFunctionBegin;
1455   switch (op) {
1456   case MAT_NEW_NONZERO_LOCATIONS:
1457   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1458   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1459   case MAT_KEEP_NONZERO_PATTERN:
1460   case MAT_NEW_NONZERO_LOCATION_ERR:
1461     MatCheckPreallocated(A,1);
1462     PetscCall(MatSetOption(a->A,op,flg));
1463     PetscCall(MatSetOption(a->B,op,flg));
1464     break;
1465   case MAT_ROW_ORIENTED:
1466     MatCheckPreallocated(A,1);
1467     a->roworiented = flg;
1468 
1469     PetscCall(MatSetOption(a->A,op,flg));
1470     PetscCall(MatSetOption(a->B,op,flg));
1471     break;
1472   case MAT_FORCE_DIAGONAL_ENTRIES:
1473   case MAT_SORTED_FULL:
1474     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1475     break;
1476   case MAT_IGNORE_OFF_PROC_ENTRIES:
1477     a->donotstash = flg;
1478     break;
1479   case MAT_USE_HASH_TABLE:
1480     a->ht_flag = flg;
1481     a->ht_fact = 1.39;
1482     break;
1483   case MAT_SYMMETRIC:
1484   case MAT_STRUCTURALLY_SYMMETRIC:
1485   case MAT_HERMITIAN:
1486   case MAT_SUBMAT_SINGLEIS:
1487   case MAT_SYMMETRY_ETERNAL:
1488     MatCheckPreallocated(A,1);
1489     PetscCall(MatSetOption(a->A,op,flg));
1490     break;
1491   default:
1492     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1493   }
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1498 {
1499   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1500   Mat_SeqBAIJ    *Aloc;
1501   Mat            B;
1502   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1503   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1504   MatScalar      *a;
1505 
1506   PetscFunctionBegin;
1507   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1508     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1509     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1510     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1511     /* Do not know preallocation information, but must set block size */
1512     PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL));
1513   } else {
1514     B = *matout;
1515   }
1516 
1517   /* copy over the A part */
1518   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1519   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1520   PetscCall(PetscMalloc1(bs,&rvals));
1521 
1522   for (i=0; i<mbs; i++) {
1523     rvals[0] = bs*(baij->rstartbs + i);
1524     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1525     for (j=ai[i]; j<ai[i+1]; j++) {
1526       col = (baij->cstartbs+aj[j])*bs;
1527       for (k=0; k<bs; k++) {
1528         PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES));
1529 
1530         col++; a += bs;
1531       }
1532     }
1533   }
1534   /* copy over the B part */
1535   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1536   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537   for (i=0; i<mbs; i++) {
1538     rvals[0] = bs*(baij->rstartbs + i);
1539     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1540     for (j=ai[i]; j<ai[i+1]; j++) {
1541       col = baij->garray[aj[j]]*bs;
1542       for (k=0; k<bs; k++) {
1543         PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES));
1544         col++;
1545         a += bs;
1546       }
1547     }
1548   }
1549   PetscCall(PetscFree(rvals));
1550   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1551   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1552 
1553   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1554   else {
1555     PetscCall(MatHeaderMerge(A,&B));
1556   }
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1561 {
1562   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1563   Mat            a     = baij->A,b = baij->B;
1564   PetscInt       s1,s2,s3;
1565 
1566   PetscFunctionBegin;
1567   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1568   if (rr) {
1569     PetscCall(VecGetLocalSize(rr,&s1));
1570     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1571     /* Overlap communication with computation. */
1572     PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1573   }
1574   if (ll) {
1575     PetscCall(VecGetLocalSize(ll,&s1));
1576     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1577     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1578   }
1579   /* scale  the diagonal block */
1580   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1581 
1582   if (rr) {
1583     /* Do a scatter end and then right scale the off-diagonal block */
1584     PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1585     PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1591 {
1592   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1593   PetscInt      *lrows;
1594   PetscInt       r, len;
1595   PetscBool      cong;
1596 
1597   PetscFunctionBegin;
1598   /* get locally owned rows */
1599   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
1600   /* fix right hand side if needed */
1601   if (x && b) {
1602     const PetscScalar *xx;
1603     PetscScalar       *bb;
1604 
1605     PetscCall(VecGetArrayRead(x,&xx));
1606     PetscCall(VecGetArray(b,&bb));
1607     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1608     PetscCall(VecRestoreArrayRead(x,&xx));
1609     PetscCall(VecRestoreArray(b,&bb));
1610   }
1611 
1612   /* actually zap the local rows */
1613   /*
1614         Zero the required rows. If the "diagonal block" of the matrix
1615      is square and the user wishes to set the diagonal we use separate
1616      code so that MatSetValues() is not called for each diagonal allocating
1617      new memory, thus calling lots of mallocs and slowing things down.
1618 
1619   */
1620   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1621   PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL));
1622   PetscCall(MatHasCongruentLayouts(A,&cong));
1623   if ((diag != 0.0) && cong) {
1624     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL));
1625   } else if (diag != 0.0) {
1626     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL));
1627     PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1628        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1629     for (r = 0; r < len; ++r) {
1630       const PetscInt row = lrows[r] + A->rmap->rstart;
1631       PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES));
1632     }
1633     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1634     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1635   } else {
1636     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL));
1637   }
1638   PetscCall(PetscFree(lrows));
1639 
1640   /* only change matrix nonzero state if pattern was allowed to be changed */
1641   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1642     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1643     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
1644   }
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1649 {
1650   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1651   PetscMPIInt       n = A->rmap->n,p = 0;
1652   PetscInt          i,j,k,r,len = 0,row,col,count;
1653   PetscInt          *lrows,*owners = A->rmap->range;
1654   PetscSFNode       *rrows;
1655   PetscSF           sf;
1656   const PetscScalar *xx;
1657   PetscScalar       *bb,*mask;
1658   Vec               xmask,lmask;
1659   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1660   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1661   PetscScalar       *aa;
1662 
1663   PetscFunctionBegin;
1664   /* Create SF where leaves are input rows and roots are owned rows */
1665   PetscCall(PetscMalloc1(n, &lrows));
1666   for (r = 0; r < n; ++r) lrows[r] = -1;
1667   PetscCall(PetscMalloc1(N, &rrows));
1668   for (r = 0; r < N; ++r) {
1669     const PetscInt idx   = rows[r];
1670     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
1671     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1672       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
1673     }
1674     rrows[r].rank  = p;
1675     rrows[r].index = rows[r] - owners[p];
1676   }
1677   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
1678   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1679   /* Collect flags for rows to be zeroed */
1680   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
1681   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
1682   PetscCall(PetscSFDestroy(&sf));
1683   /* Compress and put in row numbers */
1684   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1685   /* zero diagonal part of matrix */
1686   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
1687   /* handle off diagonal part of matrix */
1688   PetscCall(MatCreateVecs(A,&xmask,NULL));
1689   PetscCall(VecDuplicate(l->lvec,&lmask));
1690   PetscCall(VecGetArray(xmask,&bb));
1691   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1692   PetscCall(VecRestoreArray(xmask,&bb));
1693   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
1694   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
1695   PetscCall(VecDestroy(&xmask));
1696   if (x) {
1697     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
1698     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
1699     PetscCall(VecGetArrayRead(l->lvec,&xx));
1700     PetscCall(VecGetArray(b,&bb));
1701   }
1702   PetscCall(VecGetArray(lmask,&mask));
1703   /* remove zeroed rows of off diagonal matrix */
1704   for (i = 0; i < len; ++i) {
1705     row   = lrows[i];
1706     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1707     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1708     for (k = 0; k < count; ++k) {
1709       aa[0] = 0.0;
1710       aa   += bs;
1711     }
1712   }
1713   /* loop over all elements of off process part of matrix zeroing removed columns*/
1714   for (i = 0; i < l->B->rmap->N; ++i) {
1715     row = i/bs;
1716     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1717       for (k = 0; k < bs; ++k) {
1718         col = bs*baij->j[j] + k;
1719         if (PetscAbsScalar(mask[col])) {
1720           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1721           if (x) bb[i] -= aa[0]*xx[col];
1722           aa[0] = 0.0;
1723         }
1724       }
1725     }
1726   }
1727   if (x) {
1728     PetscCall(VecRestoreArray(b,&bb));
1729     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
1730   }
1731   PetscCall(VecRestoreArray(lmask,&mask));
1732   PetscCall(VecDestroy(&lmask));
1733   PetscCall(PetscFree(lrows));
1734 
1735   /* only change matrix nonzero state if pattern was allowed to be changed */
1736   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1737     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1738     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
1739   }
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1744 {
1745   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1746 
1747   PetscFunctionBegin;
1748   PetscCall(MatSetUnfactored(a->A));
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1753 
1754 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1755 {
1756   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1757   Mat            a,b,c,d;
1758   PetscBool      flg;
1759 
1760   PetscFunctionBegin;
1761   a = matA->A; b = matA->B;
1762   c = matB->A; d = matB->B;
1763 
1764   PetscCall(MatEqual(a,c,&flg));
1765   if (flg) {
1766     PetscCall(MatEqual(b,d,&flg));
1767   }
1768   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1773 {
1774   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1775   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1776 
1777   PetscFunctionBegin;
1778   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1779   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1780     PetscCall(MatCopy_Basic(A,B,str));
1781   } else {
1782     PetscCall(MatCopy(a->A,b->A,str));
1783     PetscCall(MatCopy(a->B,b->B,str));
1784   }
1785   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1790 {
1791   PetscFunctionBegin;
1792   PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1797 {
1798   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1799   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1800   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1801 
1802   PetscFunctionBegin;
1803   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1808 {
1809   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1810   PetscBLASInt   bnz,one=1;
1811   Mat_SeqBAIJ    *x,*y;
1812   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1813 
1814   PetscFunctionBegin;
1815   if (str == SAME_NONZERO_PATTERN) {
1816     PetscScalar alpha = a;
1817     x    = (Mat_SeqBAIJ*)xx->A->data;
1818     y    = (Mat_SeqBAIJ*)yy->A->data;
1819     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
1820     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1821     x    = (Mat_SeqBAIJ*)xx->B->data;
1822     y    = (Mat_SeqBAIJ*)yy->B->data;
1823     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
1824     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1825     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1826   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1827     PetscCall(MatAXPY_Basic(Y,a,X,str));
1828   } else {
1829     Mat      B;
1830     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1831     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1832     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1833     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1834     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1835     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1836     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1837     PetscCall(MatSetType(B,MATMPIBAIJ));
1838     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d));
1839     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1840     PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1841     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1842     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1843     PetscCall(MatHeaderMerge(Y,&B));
1844     PetscCall(PetscFree(nnz_d));
1845     PetscCall(PetscFree(nnz_o));
1846   }
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1851 {
1852   PetscFunctionBegin;
1853   if (PetscDefined(USE_COMPLEX)) {
1854     Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data;
1855 
1856     PetscCall(MatConjugate_SeqBAIJ(a->A));
1857     PetscCall(MatConjugate_SeqBAIJ(a->B));
1858   }
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1863 {
1864   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1865 
1866   PetscFunctionBegin;
1867   PetscCall(MatRealPart(a->A));
1868   PetscCall(MatRealPart(a->B));
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1873 {
1874   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1875 
1876   PetscFunctionBegin;
1877   PetscCall(MatImaginaryPart(a->A));
1878   PetscCall(MatImaginaryPart(a->B));
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1883 {
1884   IS             iscol_local;
1885   PetscInt       csize;
1886 
1887   PetscFunctionBegin;
1888   PetscCall(ISGetLocalSize(iscol,&csize));
1889   if (call == MAT_REUSE_MATRIX) {
1890     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1891     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1892   } else {
1893     PetscCall(ISAllGather(iscol,&iscol_local));
1894   }
1895   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1896   if (call == MAT_INITIAL_MATRIX) {
1897     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1898     PetscCall(ISDestroy(&iscol_local));
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 /*
1904   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1905   in local and then by concatenating the local matrices the end result.
1906   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1907   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1908 */
1909 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1910 {
1911   PetscMPIInt    rank,size;
1912   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1913   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1914   Mat            M,Mreuse;
1915   MatScalar      *vwork,*aa;
1916   MPI_Comm       comm;
1917   IS             isrow_new, iscol_new;
1918   Mat_SeqBAIJ    *aij;
1919 
1920   PetscFunctionBegin;
1921   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
1922   PetscCallMPI(MPI_Comm_rank(comm,&rank));
1923   PetscCallMPI(MPI_Comm_size(comm,&size));
1924   /* The compression and expansion should be avoided. Doesn't point
1925      out errors, might change the indices, hence buggey */
1926   PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new));
1927   PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new));
1928 
1929   if (call ==  MAT_REUSE_MATRIX) {
1930     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
1931     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1932     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse));
1933   } else {
1934     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse));
1935   }
1936   PetscCall(ISDestroy(&isrow_new));
1937   PetscCall(ISDestroy(&iscol_new));
1938   /*
1939       m - number of local rows
1940       n - number of columns (same on all processors)
1941       rstart - first row in new global matrix generated
1942   */
1943   PetscCall(MatGetBlockSize(mat,&bs));
1944   PetscCall(MatGetSize(Mreuse,&m,&n));
1945   m    = m/bs;
1946   n    = n/bs;
1947 
1948   if (call == MAT_INITIAL_MATRIX) {
1949     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1950     ii  = aij->i;
1951     jj  = aij->j;
1952 
1953     /*
1954         Determine the number of non-zeros in the diagonal and off-diagonal
1955         portions of the matrix in order to do correct preallocation
1956     */
1957 
1958     /* first get start and end of "diagonal" columns */
1959     if (csize == PETSC_DECIDE) {
1960       PetscCall(ISGetSize(isrow,&mglobal));
1961       if (mglobal == n*bs) { /* square matrix */
1962         nlocal = m;
1963       } else {
1964         nlocal = n/size + ((n % size) > rank);
1965       }
1966     } else {
1967       nlocal = csize/bs;
1968     }
1969     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
1970     rstart = rend - nlocal;
1971     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
1972 
1973     /* next, compute all the lengths */
1974     PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens));
1975     for (i=0; i<m; i++) {
1976       jend = ii[i+1] - ii[i];
1977       olen = 0;
1978       dlen = 0;
1979       for (j=0; j<jend; j++) {
1980         if (*jj < rstart || *jj >= rend) olen++;
1981         else dlen++;
1982         jj++;
1983       }
1984       olens[i] = olen;
1985       dlens[i] = dlen;
1986     }
1987     PetscCall(MatCreate(comm,&M));
1988     PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n));
1989     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
1990     PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens));
1991     PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens));
1992     PetscCall(PetscFree2(dlens,olens));
1993   } else {
1994     PetscInt ml,nl;
1995 
1996     M    = *newmat;
1997     PetscCall(MatGetLocalSize(M,&ml,&nl));
1998     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1999     PetscCall(MatZeroEntries(M));
2000     /*
2001          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2002        rather than the slower MatSetValues().
2003     */
2004     M->was_assembled = PETSC_TRUE;
2005     M->assembled     = PETSC_FALSE;
2006   }
2007   PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE));
2008   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
2009   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2010   ii   = aij->i;
2011   jj   = aij->j;
2012   aa   = aij->a;
2013   for (i=0; i<m; i++) {
2014     row   = rstart/bs + i;
2015     nz    = ii[i+1] - ii[i];
2016     cwork = jj;     jj += nz;
2017     vwork = aa;     aa += nz*bs*bs;
2018     PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
2019   }
2020 
2021   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
2022   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
2023   *newmat = M;
2024 
2025   /* save submatrix used in processor for next request */
2026   if (call ==  MAT_INITIAL_MATRIX) {
2027     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
2028     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2034 {
2035   MPI_Comm       comm,pcomm;
2036   PetscInt       clocal_size,nrows;
2037   const PetscInt *rows;
2038   PetscMPIInt    size;
2039   IS             crowp,lcolp;
2040 
2041   PetscFunctionBegin;
2042   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2043   /* make a collective version of 'rowp' */
2044   PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm));
2045   if (pcomm==comm) {
2046     crowp = rowp;
2047   } else {
2048     PetscCall(ISGetSize(rowp,&nrows));
2049     PetscCall(ISGetIndices(rowp,&rows));
2050     PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp));
2051     PetscCall(ISRestoreIndices(rowp,&rows));
2052   }
2053   PetscCall(ISSetPermutation(crowp));
2054   /* make a local version of 'colp' */
2055   PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm));
2056   PetscCallMPI(MPI_Comm_size(pcomm,&size));
2057   if (size==1) {
2058     lcolp = colp;
2059   } else {
2060     PetscCall(ISAllGather(colp,&lcolp));
2061   }
2062   PetscCall(ISSetPermutation(lcolp));
2063   /* now we just get the submatrix */
2064   PetscCall(MatGetLocalSize(A,NULL,&clocal_size));
2065   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B));
2066   /* clean up */
2067   if (pcomm!=comm) {
2068     PetscCall(ISDestroy(&crowp));
2069   }
2070   if (size>1) {
2071     PetscCall(ISDestroy(&lcolp));
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2077 {
2078   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2079   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2080 
2081   PetscFunctionBegin;
2082   if (nghosts) *nghosts = B->nbs;
2083   if (ghosts) *ghosts = baij->garray;
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2088 {
2089   Mat            B;
2090   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2091   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2092   Mat_SeqAIJ     *b;
2093   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2094   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2095   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2096 
2097   PetscFunctionBegin;
2098   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2099   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2100 
2101   /* ----------------------------------------------------------------
2102      Tell every processor the number of nonzeros per row
2103   */
2104   PetscCall(PetscMalloc1(A->rmap->N/bs,&lens));
2105   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2106     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2107   }
2108   PetscCall(PetscMalloc1(2*size,&recvcounts));
2109   displs    = recvcounts + size;
2110   for (i=0; i<size; i++) {
2111     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2112     displs[i]     = A->rmap->range[i]/bs;
2113   }
2114   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A)));
2115   /* ---------------------------------------------------------------
2116      Create the sequential matrix of the same type as the local block diagonal
2117   */
2118   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
2119   PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE));
2120   PetscCall(MatSetType(B,MATSEQAIJ));
2121   PetscCall(MatSeqAIJSetPreallocation(B,0,lens));
2122   b    = (Mat_SeqAIJ*)B->data;
2123 
2124   /*--------------------------------------------------------------------
2125     Copy my part of matrix column indices over
2126   */
2127   sendcount  = ad->nz + bd->nz;
2128   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2129   a_jsendbuf = ad->j;
2130   b_jsendbuf = bd->j;
2131   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2132   cnt        = 0;
2133   for (i=0; i<n; i++) {
2134 
2135     /* put in lower diagonal portion */
2136     m = bd->i[i+1] - bd->i[i];
2137     while (m > 0) {
2138       /* is it above diagonal (in bd (compressed) numbering) */
2139       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2140       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2141       m--;
2142     }
2143 
2144     /* put in diagonal portion */
2145     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2146       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2147     }
2148 
2149     /* put in upper diagonal portion */
2150     while (m-- > 0) {
2151       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2152     }
2153   }
2154   PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt);
2155 
2156   /*--------------------------------------------------------------------
2157     Gather all column indices to all processors
2158   */
2159   for (i=0; i<size; i++) {
2160     recvcounts[i] = 0;
2161     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2162       recvcounts[i] += lens[j];
2163     }
2164   }
2165   displs[0] = 0;
2166   for (i=1; i<size; i++) {
2167     displs[i] = displs[i-1] + recvcounts[i-1];
2168   }
2169   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A)));
2170   /*--------------------------------------------------------------------
2171     Assemble the matrix into useable form (note numerical values not yet set)
2172   */
2173   /* set the b->ilen (length of each row) values */
2174   PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs));
2175   /* set the b->i indices */
2176   b->i[0] = 0;
2177   for (i=1; i<=A->rmap->N/bs; i++) {
2178     b->i[i] = b->i[i-1] + lens[i-1];
2179   }
2180   PetscCall(PetscFree(lens));
2181   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2182   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2183   PetscCall(PetscFree(recvcounts));
2184 
2185   if (A->symmetric) {
2186     PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE));
2187   } else if (A->hermitian) {
2188     PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE));
2189   } else if (A->structurally_symmetric) {
2190     PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE));
2191   }
2192   *newmat = B;
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2197 {
2198   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2199   Vec            bb1 = NULL;
2200 
2201   PetscFunctionBegin;
2202   if (flag == SOR_APPLY_UPPER) {
2203     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2204     PetscFunctionReturn(0);
2205   }
2206 
2207   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2208     PetscCall(VecDuplicate(bb,&bb1));
2209   }
2210 
2211   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2212     if (flag & SOR_ZERO_INITIAL_GUESS) {
2213       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2214       its--;
2215     }
2216 
2217     while (its--) {
2218       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2219       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2220 
2221       /* update rhs: bb1 = bb - B*x */
2222       PetscCall(VecScale(mat->lvec,-1.0));
2223       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2224 
2225       /* local sweep */
2226       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
2227     }
2228   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2229     if (flag & SOR_ZERO_INITIAL_GUESS) {
2230       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2231       its--;
2232     }
2233     while (its--) {
2234       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2235       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2236 
2237       /* update rhs: bb1 = bb - B*x */
2238       PetscCall(VecScale(mat->lvec,-1.0));
2239       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2240 
2241       /* local sweep */
2242       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
2243     }
2244   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2245     if (flag & SOR_ZERO_INITIAL_GUESS) {
2246       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2247       its--;
2248     }
2249     while (its--) {
2250       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2251       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2252 
2253       /* update rhs: bb1 = bb - B*x */
2254       PetscCall(VecScale(mat->lvec,-1.0));
2255       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2256 
2257       /* local sweep */
2258       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
2259     }
2260   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2261 
2262   PetscCall(VecDestroy(&bb1));
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions)
2267 {
2268   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2269   PetscInt       m,N,i,*garray = aij->garray;
2270   PetscInt       ib,jb,bs = A->rmap->bs;
2271   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2272   MatScalar      *a_val = a_aij->a;
2273   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2274   MatScalar      *b_val = b_aij->a;
2275   PetscReal      *work;
2276 
2277   PetscFunctionBegin;
2278   PetscCall(MatGetSize(A,&m,&N));
2279   PetscCall(PetscCalloc1(N,&work));
2280   if (type == NORM_2) {
2281     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2282       for (jb=0; jb<bs; jb++) {
2283         for (ib=0; ib<bs; ib++) {
2284           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2285           a_val++;
2286         }
2287       }
2288     }
2289     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2290       for (jb=0; jb<bs; jb++) {
2291         for (ib=0; ib<bs; ib++) {
2292           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2293           b_val++;
2294         }
2295       }
2296     }
2297   } else if (type == NORM_1) {
2298     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2299       for (jb=0; jb<bs; jb++) {
2300         for (ib=0; ib<bs; ib++) {
2301           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2302           a_val++;
2303         }
2304       }
2305     }
2306     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2307       for (jb=0; jb<bs; jb++) {
2308        for (ib=0; ib<bs; ib++) {
2309           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2310           b_val++;
2311         }
2312       }
2313     }
2314   } else if (type == NORM_INFINITY) {
2315     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2316       for (jb=0; jb<bs; jb++) {
2317         for (ib=0; ib<bs; ib++) {
2318           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2319           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2320           a_val++;
2321         }
2322       }
2323     }
2324     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2325       for (jb=0; jb<bs; jb++) {
2326         for (ib=0; ib<bs; ib++) {
2327           int col = garray[b_aij->j[i]] * bs + jb;
2328           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2329           b_val++;
2330         }
2331       }
2332     }
2333   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2334     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2335       for (jb=0; jb<bs; jb++) {
2336         for (ib=0; ib<bs; ib++) {
2337           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2338           a_val++;
2339         }
2340       }
2341     }
2342     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2343       for (jb=0; jb<bs; jb++) {
2344        for (ib=0; ib<bs; ib++) {
2345           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2346           b_val++;
2347         }
2348       }
2349     }
2350   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2351     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2352       for (jb=0; jb<bs; jb++) {
2353         for (ib=0; ib<bs; ib++) {
2354           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2355           a_val++;
2356         }
2357       }
2358     }
2359     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2360       for (jb=0; jb<bs; jb++) {
2361        for (ib=0; ib<bs; ib++) {
2362           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2363           b_val++;
2364         }
2365       }
2366     }
2367   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
2368   if (type == NORM_INFINITY) {
2369     PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
2370   } else {
2371     PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
2372   }
2373   PetscCall(PetscFree(work));
2374   if (type == NORM_2) {
2375     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2376   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2377     for (i=0; i<N; i++) reductions[i] /= m;
2378   }
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2383 {
2384   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2385 
2386   PetscFunctionBegin;
2387   PetscCall(MatInvertBlockDiagonal(a->A,values));
2388   A->factorerrortype             = a->A->factorerrortype;
2389   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2390   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2395 {
2396   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2397   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2398 
2399   PetscFunctionBegin;
2400   if (!Y->preallocated) {
2401     PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
2402   } else if (!aij->nz) {
2403     PetscInt nonew = aij->nonew;
2404     PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
2405     aij->nonew = nonew;
2406   }
2407   PetscCall(MatShift_Basic(Y,a));
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2412 {
2413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2414 
2415   PetscFunctionBegin;
2416   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2417   PetscCall(MatMissingDiagonal(a->A,missing,d));
2418   if (d) {
2419     PetscInt rstart;
2420     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2421     *d += rstart/A->rmap->bs;
2422 
2423   }
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2428 {
2429   PetscFunctionBegin;
2430   *a = ((Mat_MPIBAIJ*)A->data)->A;
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 /* -------------------------------------------------------------------*/
2435 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2436                                        MatGetRow_MPIBAIJ,
2437                                        MatRestoreRow_MPIBAIJ,
2438                                        MatMult_MPIBAIJ,
2439                                 /* 4*/ MatMultAdd_MPIBAIJ,
2440                                        MatMultTranspose_MPIBAIJ,
2441                                        MatMultTransposeAdd_MPIBAIJ,
2442                                        NULL,
2443                                        NULL,
2444                                        NULL,
2445                                 /*10*/ NULL,
2446                                        NULL,
2447                                        NULL,
2448                                        MatSOR_MPIBAIJ,
2449                                        MatTranspose_MPIBAIJ,
2450                                 /*15*/ MatGetInfo_MPIBAIJ,
2451                                        MatEqual_MPIBAIJ,
2452                                        MatGetDiagonal_MPIBAIJ,
2453                                        MatDiagonalScale_MPIBAIJ,
2454                                        MatNorm_MPIBAIJ,
2455                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2456                                        MatAssemblyEnd_MPIBAIJ,
2457                                        MatSetOption_MPIBAIJ,
2458                                        MatZeroEntries_MPIBAIJ,
2459                                 /*24*/ MatZeroRows_MPIBAIJ,
2460                                        NULL,
2461                                        NULL,
2462                                        NULL,
2463                                        NULL,
2464                                 /*29*/ MatSetUp_MPIBAIJ,
2465                                        NULL,
2466                                        NULL,
2467                                        MatGetDiagonalBlock_MPIBAIJ,
2468                                        NULL,
2469                                 /*34*/ MatDuplicate_MPIBAIJ,
2470                                        NULL,
2471                                        NULL,
2472                                        NULL,
2473                                        NULL,
2474                                 /*39*/ MatAXPY_MPIBAIJ,
2475                                        MatCreateSubMatrices_MPIBAIJ,
2476                                        MatIncreaseOverlap_MPIBAIJ,
2477                                        MatGetValues_MPIBAIJ,
2478                                        MatCopy_MPIBAIJ,
2479                                 /*44*/ NULL,
2480                                        MatScale_MPIBAIJ,
2481                                        MatShift_MPIBAIJ,
2482                                        NULL,
2483                                        MatZeroRowsColumns_MPIBAIJ,
2484                                 /*49*/ NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        NULL,
2488                                        NULL,
2489                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2490                                        NULL,
2491                                        MatSetUnfactored_MPIBAIJ,
2492                                        MatPermute_MPIBAIJ,
2493                                        MatSetValuesBlocked_MPIBAIJ,
2494                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2495                                        MatDestroy_MPIBAIJ,
2496                                        MatView_MPIBAIJ,
2497                                        NULL,
2498                                        NULL,
2499                                 /*64*/ NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                        NULL,
2504                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        NULL,
2509                                 /*74*/ NULL,
2510                                        MatFDColoringApply_BAIJ,
2511                                        NULL,
2512                                        NULL,
2513                                        NULL,
2514                                 /*79*/ NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                        MatLoad_MPIBAIJ,
2519                                 /*84*/ NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                        NULL,
2524                                 /*89*/ NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                        NULL,
2529                                 /*94*/ NULL,
2530                                        NULL,
2531                                        NULL,
2532                                        NULL,
2533                                        NULL,
2534                                 /*99*/ NULL,
2535                                        NULL,
2536                                        NULL,
2537                                        MatConjugate_MPIBAIJ,
2538                                        NULL,
2539                                 /*104*/NULL,
2540                                        MatRealPart_MPIBAIJ,
2541                                        MatImaginaryPart_MPIBAIJ,
2542                                        NULL,
2543                                        NULL,
2544                                 /*109*/NULL,
2545                                        NULL,
2546                                        NULL,
2547                                        NULL,
2548                                        MatMissingDiagonal_MPIBAIJ,
2549                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2550                                        NULL,
2551                                        MatGetGhosts_MPIBAIJ,
2552                                        NULL,
2553                                        NULL,
2554                                 /*119*/NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        NULL,
2558                                        MatGetMultiProcBlock_MPIBAIJ,
2559                                 /*124*/NULL,
2560                                        MatGetColumnReductions_MPIBAIJ,
2561                                        MatInvertBlockDiagonal_MPIBAIJ,
2562                                        NULL,
2563                                        NULL,
2564                                /*129*/ NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        NULL,
2568                                        NULL,
2569                                /*134*/ NULL,
2570                                        NULL,
2571                                        NULL,
2572                                        NULL,
2573                                        NULL,
2574                                /*139*/ MatSetBlockSizes_Default,
2575                                        NULL,
2576                                        NULL,
2577                                        MatFDColoringSetUp_MPIXAIJ,
2578                                        NULL,
2579                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2580                                        NULL,
2581                                        NULL,
2582                                        NULL
2583 };
2584 
2585 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2586 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2587 
2588 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2589 {
2590   PetscInt       m,rstart,cstart,cend;
2591   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2592   const PetscInt *JJ    =NULL;
2593   PetscScalar    *values=NULL;
2594   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2595   PetscBool      nooffprocentries;
2596 
2597   PetscFunctionBegin;
2598   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
2599   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
2600   PetscCall(PetscLayoutSetUp(B->rmap));
2601   PetscCall(PetscLayoutSetUp(B->cmap));
2602   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2603   m      = B->rmap->n/bs;
2604   rstart = B->rmap->rstart/bs;
2605   cstart = B->cmap->rstart/bs;
2606   cend   = B->cmap->rend/bs;
2607 
2608   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2609   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2610   for (i=0; i<m; i++) {
2611     nz = ii[i+1] - ii[i];
2612     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2613     nz_max = PetscMax(nz_max,nz);
2614     dlen   = 0;
2615     olen   = 0;
2616     JJ     = jj + ii[i];
2617     for (j=0; j<nz; j++) {
2618       if (*JJ < cstart || *JJ >= cend) olen++;
2619       else dlen++;
2620       JJ++;
2621     }
2622     d_nnz[i] = dlen;
2623     o_nnz[i] = olen;
2624   }
2625   PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2626   PetscCall(PetscFree2(d_nnz,o_nnz));
2627 
2628   values = (PetscScalar*)V;
2629   if (!values) {
2630     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2631   }
2632   for (i=0; i<m; i++) {
2633     PetscInt          row    = i + rstart;
2634     PetscInt          ncols  = ii[i+1] - ii[i];
2635     const PetscInt    *icols = jj + ii[i];
2636     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2637       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2638       PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2639     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2640       PetscInt j;
2641       for (j=0; j<ncols; j++) {
2642         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2643         PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2644       }
2645     }
2646   }
2647 
2648   if (!V) PetscCall(PetscFree(values));
2649   nooffprocentries    = B->nooffprocentries;
2650   B->nooffprocentries = PETSC_TRUE;
2651   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2652   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2653   B->nooffprocentries = nooffprocentries;
2654 
2655   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 /*@C
2660    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2661 
2662    Collective
2663 
2664    Input Parameters:
2665 +  B - the matrix
2666 .  bs - the block size
2667 .  i - the indices into j for the start of each local row (starts with zero)
2668 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2669 -  v - optional values in the matrix
2670 
2671    Level: advanced
2672 
2673    Notes:
2674     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2675    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2676    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2677    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2678    block column and the second index is over columns within a block.
2679 
2680    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2681 
2682 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2683 @*/
2684 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2685 {
2686   PetscFunctionBegin;
2687   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2688   PetscValidType(B,1);
2689   PetscValidLogicalCollectiveInt(B,bs,2);
2690   PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2695 {
2696   Mat_MPIBAIJ    *b;
2697   PetscInt       i;
2698   PetscMPIInt    size;
2699 
2700   PetscFunctionBegin;
2701   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
2702   PetscCall(PetscLayoutSetUp(B->rmap));
2703   PetscCall(PetscLayoutSetUp(B->cmap));
2704   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2705 
2706   if (d_nnz) {
2707     for (i=0; i<B->rmap->n/bs; i++) {
2708       PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]);
2709     }
2710   }
2711   if (o_nnz) {
2712     for (i=0; i<B->rmap->n/bs; i++) {
2713       PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]);
2714     }
2715   }
2716 
2717   b      = (Mat_MPIBAIJ*)B->data;
2718   b->bs2 = bs*bs;
2719   b->mbs = B->rmap->n/bs;
2720   b->nbs = B->cmap->n/bs;
2721   b->Mbs = B->rmap->N/bs;
2722   b->Nbs = B->cmap->N/bs;
2723 
2724   for (i=0; i<=b->size; i++) {
2725     b->rangebs[i] = B->rmap->range[i]/bs;
2726   }
2727   b->rstartbs = B->rmap->rstart/bs;
2728   b->rendbs   = B->rmap->rend/bs;
2729   b->cstartbs = B->cmap->rstart/bs;
2730   b->cendbs   = B->cmap->rend/bs;
2731 
2732 #if defined(PETSC_USE_CTABLE)
2733   PetscCall(PetscTableDestroy(&b->colmap));
2734 #else
2735   PetscCall(PetscFree(b->colmap));
2736 #endif
2737   PetscCall(PetscFree(b->garray));
2738   PetscCall(VecDestroy(&b->lvec));
2739   PetscCall(VecScatterDestroy(&b->Mvctx));
2740 
2741   /* Because the B will have been resized we simply destroy it and create a new one each time */
2742   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2743   PetscCall(MatDestroy(&b->B));
2744   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2745   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2746   PetscCall(MatSetType(b->B,MATSEQBAIJ));
2747   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2748 
2749   if (!B->preallocated) {
2750     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2751     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2752     PetscCall(MatSetType(b->A,MATSEQBAIJ));
2753     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2754     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
2755   }
2756 
2757   PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
2758   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
2759   B->preallocated  = PETSC_TRUE;
2760   B->was_assembled = PETSC_FALSE;
2761   B->assembled     = PETSC_FALSE;
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2766 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2767 
2768 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2769 {
2770   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2771   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2772   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2773   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2774 
2775   PetscFunctionBegin;
2776   PetscCall(PetscMalloc1(M+1,&ii));
2777   ii[0] = 0;
2778   for (i=0; i<M; i++) {
2779     PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]);
2780     PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]);
2781     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2782     /* remove one from count of matrix has diagonal */
2783     for (j=id[i]; j<id[i+1]; j++) {
2784       if (jd[j] == i) {ii[i+1]--;break;}
2785     }
2786   }
2787   PetscCall(PetscMalloc1(ii[M],&jj));
2788   cnt  = 0;
2789   for (i=0; i<M; i++) {
2790     for (j=io[i]; j<io[i+1]; j++) {
2791       if (garray[jo[j]] > rstart) break;
2792       jj[cnt++] = garray[jo[j]];
2793     }
2794     for (k=id[i]; k<id[i+1]; k++) {
2795       if (jd[k] != i) {
2796         jj[cnt++] = rstart + jd[k];
2797       }
2798     }
2799     for (; j<io[i+1]; j++) {
2800       jj[cnt++] = garray[jo[j]];
2801     }
2802   }
2803   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj));
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2808 
2809 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2810 
2811 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2812 {
2813   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2814   Mat_MPIAIJ  *b;
2815   Mat         B;
2816 
2817   PetscFunctionBegin;
2818   PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2819 
2820   if (reuse == MAT_REUSE_MATRIX) {
2821     B = *newmat;
2822   } else {
2823     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2824     PetscCall(MatSetType(B,MATMPIAIJ));
2825     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2826     PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs));
2827     PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
2828     PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL));
2829   }
2830   b = (Mat_MPIAIJ*) B->data;
2831 
2832   if (reuse == MAT_REUSE_MATRIX) {
2833     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2834     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2835   } else {
2836     PetscCall(MatDestroy(&b->A));
2837     PetscCall(MatDestroy(&b->B));
2838     PetscCall(MatDisAssemble_MPIBAIJ(A));
2839     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2840     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2841     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
2842     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
2843   }
2844   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2845   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2846 
2847   if (reuse == MAT_INPLACE_MATRIX) {
2848     PetscCall(MatHeaderReplace(A,&B));
2849   } else {
2850    *newmat = B;
2851   }
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 /*MC
2856    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2857 
2858    Options Database Keys:
2859 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2860 . -mat_block_size <bs> - set the blocksize used to store the matrix
2861 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2862 - -mat_use_hash_table <fact> - set hash table factor
2863 
2864    Level: beginner
2865 
2866    Notes:
2867     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2868     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2869 
2870 .seealso: `MatCreateBAIJ`
2871 M*/
2872 
2873 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2874 
2875 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2876 {
2877   Mat_MPIBAIJ    *b;
2878   PetscBool      flg = PETSC_FALSE;
2879 
2880   PetscFunctionBegin;
2881   PetscCall(PetscNewLog(B,&b));
2882   B->data = (void*)b;
2883 
2884   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2885   B->assembled = PETSC_FALSE;
2886 
2887   B->insertmode = NOT_SET_VALUES;
2888   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2889   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2890 
2891   /* build local table of row and column ownerships */
2892   PetscCall(PetscMalloc1(b->size+1,&b->rangebs));
2893 
2894   /* build cache for off array entries formed */
2895   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2896 
2897   b->donotstash  = PETSC_FALSE;
2898   b->colmap      = NULL;
2899   b->garray      = NULL;
2900   b->roworiented = PETSC_TRUE;
2901 
2902   /* stuff used in block assembly */
2903   b->barray = NULL;
2904 
2905   /* stuff used for matrix vector multiply */
2906   b->lvec  = NULL;
2907   b->Mvctx = NULL;
2908 
2909   /* stuff for MatGetRow() */
2910   b->rowindices   = NULL;
2911   b->rowvalues    = NULL;
2912   b->getrowactive = PETSC_FALSE;
2913 
2914   /* hash table stuff */
2915   b->ht           = NULL;
2916   b->hd           = NULL;
2917   b->ht_size      = 0;
2918   b->ht_flag      = PETSC_FALSE;
2919   b->ht_fact      = 0;
2920   b->ht_total_ct  = 0;
2921   b->ht_insert_ct = 0;
2922 
2923   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2924   b->ijonly = PETSC_FALSE;
2925 
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ));
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ));
2929 #if defined(PETSC_HAVE_HYPRE)
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE));
2931 #endif
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS));
2939   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ));
2940 
2941   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2942   PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg));
2943   if (flg) {
2944     PetscReal fact = 1.39;
2945     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2946     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2947     if (fact <= 1.0) fact = 1.39;
2948     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2949     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2950   }
2951   PetscOptionsEnd();
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 /*MC
2956    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2957 
2958    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2959    and MATMPIBAIJ otherwise.
2960 
2961    Options Database Keys:
2962 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2963 
2964   Level: beginner
2965 
2966 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2967 M*/
2968 
2969 /*@C
2970    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2971    (block compressed row).  For good matrix assembly performance
2972    the user should preallocate the matrix storage by setting the parameters
2973    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2974    performance can be increased by more than a factor of 50.
2975 
2976    Collective on Mat
2977 
2978    Input Parameters:
2979 +  B - the matrix
2980 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2981           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2982 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2983            submatrix  (same for all local rows)
2984 .  d_nnz - array containing the number of block nonzeros in the various block rows
2985            of the in diagonal portion of the local (possibly different for each block
2986            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2987            set it even if it is zero.
2988 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2989            submatrix (same for all local rows).
2990 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2991            off-diagonal portion of the local submatrix (possibly different for
2992            each block row) or NULL.
2993 
2994    If the *_nnz parameter is given then the *_nz parameter is ignored
2995 
2996    Options Database Keys:
2997 +   -mat_block_size - size of the blocks to use
2998 -   -mat_use_hash_table <fact> - set hash table factor
2999 
3000    Notes:
3001    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3002    than it must be used on all processors that share the object for that argument.
3003 
3004    Storage Information:
3005    For a square global matrix we define each processor's diagonal portion
3006    to be its local rows and the corresponding columns (a square submatrix);
3007    each processor's off-diagonal portion encompasses the remainder of the
3008    local matrix (a rectangular submatrix).
3009 
3010    The user can specify preallocated storage for the diagonal part of
3011    the local submatrix with either d_nz or d_nnz (not both).  Set
3012    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3013    memory allocation.  Likewise, specify preallocated storage for the
3014    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3015 
3016    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3017    the figure below we depict these three local rows and all columns (0-11).
3018 
3019 .vb
3020            0 1 2 3 4 5 6 7 8 9 10 11
3021           --------------------------
3022    row 3  |o o o d d d o o o o  o  o
3023    row 4  |o o o d d d o o o o  o  o
3024    row 5  |o o o d d d o o o o  o  o
3025           --------------------------
3026 .ve
3027 
3028    Thus, any entries in the d locations are stored in the d (diagonal)
3029    submatrix, and any entries in the o locations are stored in the
3030    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3031    stored simply in the MATSEQBAIJ format for compressed row storage.
3032 
3033    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3034    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3035    In general, for PDE problems in which most nonzeros are near the diagonal,
3036    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3037    or you will get TERRIBLE performance; see the users' manual chapter on
3038    matrices.
3039 
3040    You can call MatGetInfo() to get information on how effective the preallocation was;
3041    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3042    You can also run with the option -info and look for messages with the string
3043    malloc in them to see if additional memory allocation was needed.
3044 
3045    Level: intermediate
3046 
3047 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3048 @*/
3049 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3050 {
3051   PetscFunctionBegin;
3052   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3053   PetscValidType(B,1);
3054   PetscValidLogicalCollectiveInt(B,bs,2);
3055   PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
3056   PetscFunctionReturn(0);
3057 }
3058 
3059 /*@C
3060    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3061    (block compressed row).  For good matrix assembly performance
3062    the user should preallocate the matrix storage by setting the parameters
3063    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3064    performance can be increased by more than a factor of 50.
3065 
3066    Collective
3067 
3068    Input Parameters:
3069 +  comm - MPI communicator
3070 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3071           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3072 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3073            This value should be the same as the local size used in creating the
3074            y vector for the matrix-vector product y = Ax.
3075 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3076            This value should be the same as the local size used in creating the
3077            x vector for the matrix-vector product y = Ax.
3078 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3079 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3080 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3081            submatrix  (same for all local rows)
3082 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3083            of the in diagonal portion of the local (possibly different for each block
3084            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3085            and set it even if it is zero.
3086 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3087            submatrix (same for all local rows).
3088 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3089            off-diagonal portion of the local submatrix (possibly different for
3090            each block row) or NULL.
3091 
3092    Output Parameter:
3093 .  A - the matrix
3094 
3095    Options Database Keys:
3096 +   -mat_block_size - size of the blocks to use
3097 -   -mat_use_hash_table <fact> - set hash table factor
3098 
3099    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3100    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3101    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3102 
3103    Notes:
3104    If the *_nnz parameter is given then the *_nz parameter is ignored
3105 
3106    A nonzero block is any block that as 1 or more nonzeros in it
3107 
3108    The user MUST specify either the local or global matrix dimensions
3109    (possibly both).
3110 
3111    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3112    than it must be used on all processors that share the object for that argument.
3113 
3114    Storage Information:
3115    For a square global matrix we define each processor's diagonal portion
3116    to be its local rows and the corresponding columns (a square submatrix);
3117    each processor's off-diagonal portion encompasses the remainder of the
3118    local matrix (a rectangular submatrix).
3119 
3120    The user can specify preallocated storage for the diagonal part of
3121    the local submatrix with either d_nz or d_nnz (not both).  Set
3122    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3123    memory allocation.  Likewise, specify preallocated storage for the
3124    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3125 
3126    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3127    the figure below we depict these three local rows and all columns (0-11).
3128 
3129 .vb
3130            0 1 2 3 4 5 6 7 8 9 10 11
3131           --------------------------
3132    row 3  |o o o d d d o o o o  o  o
3133    row 4  |o o o d d d o o o o  o  o
3134    row 5  |o o o d d d o o o o  o  o
3135           --------------------------
3136 .ve
3137 
3138    Thus, any entries in the d locations are stored in the d (diagonal)
3139    submatrix, and any entries in the o locations are stored in the
3140    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3141    stored simply in the MATSEQBAIJ format for compressed row storage.
3142 
3143    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3144    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3145    In general, for PDE problems in which most nonzeros are near the diagonal,
3146    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3147    or you will get TERRIBLE performance; see the users' manual chapter on
3148    matrices.
3149 
3150    Level: intermediate
3151 
3152 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3153 @*/
3154 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3155 {
3156   PetscMPIInt    size;
3157 
3158   PetscFunctionBegin;
3159   PetscCall(MatCreate(comm,A));
3160   PetscCall(MatSetSizes(*A,m,n,M,N));
3161   PetscCallMPI(MPI_Comm_size(comm,&size));
3162   if (size > 1) {
3163     PetscCall(MatSetType(*A,MATMPIBAIJ));
3164     PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
3165   } else {
3166     PetscCall(MatSetType(*A,MATSEQBAIJ));
3167     PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
3168   }
3169   PetscFunctionReturn(0);
3170 }
3171 
3172 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3173 {
3174   Mat            mat;
3175   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3176   PetscInt       len=0;
3177 
3178   PetscFunctionBegin;
3179   *newmat = NULL;
3180   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
3181   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
3182   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
3183 
3184   mat->factortype   = matin->factortype;
3185   mat->preallocated = PETSC_TRUE;
3186   mat->assembled    = PETSC_TRUE;
3187   mat->insertmode   = NOT_SET_VALUES;
3188 
3189   a             = (Mat_MPIBAIJ*)mat->data;
3190   mat->rmap->bs = matin->rmap->bs;
3191   a->bs2        = oldmat->bs2;
3192   a->mbs        = oldmat->mbs;
3193   a->nbs        = oldmat->nbs;
3194   a->Mbs        = oldmat->Mbs;
3195   a->Nbs        = oldmat->Nbs;
3196 
3197   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
3198   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
3199 
3200   a->size         = oldmat->size;
3201   a->rank         = oldmat->rank;
3202   a->donotstash   = oldmat->donotstash;
3203   a->roworiented  = oldmat->roworiented;
3204   a->rowindices   = NULL;
3205   a->rowvalues    = NULL;
3206   a->getrowactive = PETSC_FALSE;
3207   a->barray       = NULL;
3208   a->rstartbs     = oldmat->rstartbs;
3209   a->rendbs       = oldmat->rendbs;
3210   a->cstartbs     = oldmat->cstartbs;
3211   a->cendbs       = oldmat->cendbs;
3212 
3213   /* hash table stuff */
3214   a->ht           = NULL;
3215   a->hd           = NULL;
3216   a->ht_size      = 0;
3217   a->ht_flag      = oldmat->ht_flag;
3218   a->ht_fact      = oldmat->ht_fact;
3219   a->ht_total_ct  = 0;
3220   a->ht_insert_ct = 0;
3221 
3222   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1));
3223   if (oldmat->colmap) {
3224 #if defined(PETSC_USE_CTABLE)
3225     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
3226 #else
3227     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
3228     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
3229     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
3230 #endif
3231   } else a->colmap = NULL;
3232 
3233   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3234     PetscCall(PetscMalloc1(len,&a->garray));
3235     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
3236     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
3237   } else a->garray = NULL;
3238 
3239   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
3240   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
3241   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
3242   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
3243   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
3244 
3245   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
3246   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
3247   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
3248   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
3249   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
3250   *newmat = mat;
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3255 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3256 {
3257   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3258   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3259   PetscScalar    *matvals;
3260 
3261   PetscFunctionBegin;
3262   PetscCall(PetscViewerSetUp(viewer));
3263 
3264   /* read in matrix header */
3265   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3266   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3267   M  = header[1]; N = header[2]; nz = header[3];
3268   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3269   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3270   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3271 
3272   /* set block sizes from the viewer's .info file */
3273   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3274   /* set local sizes if not set already */
3275   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3276   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3277   /* set global sizes if not set already */
3278   if (mat->rmap->N < 0) mat->rmap->N = M;
3279   if (mat->cmap->N < 0) mat->cmap->N = N;
3280   PetscCall(PetscLayoutSetUp(mat->rmap));
3281   PetscCall(PetscLayoutSetUp(mat->cmap));
3282 
3283   /* check if the matrix sizes are correct */
3284   PetscCall(MatGetSize(mat,&rows,&cols));
3285   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3286   PetscCall(MatGetBlockSize(mat,&bs));
3287   PetscCall(MatGetLocalSize(mat,&m,&n));
3288   PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL));
3289   PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce));
3290   mbs = m/bs; nbs = n/bs;
3291 
3292   /* read in row lengths and build row indices */
3293   PetscCall(PetscMalloc1(m+1,&rowidxs));
3294   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3295   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3296   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3297   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3298 
3299   /* read in column indices and matrix values */
3300   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3301   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3302   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3303 
3304   { /* preallocate matrix storage */
3305     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3306     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3307     PetscBool  sbaij,done;
3308     PetscInt   *d_nnz,*o_nnz;
3309 
3310     PetscCall(PetscBTCreate(nbs,&bt));
3311     PetscCall(PetscHSetICreate(&ht));
3312     PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz));
3313     PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij));
3314     for (i=0; i<mbs; i++) {
3315       PetscCall(PetscBTMemzero(nbs,bt));
3316       PetscCall(PetscHSetIClear(ht));
3317       for (k=0; k<bs; k++) {
3318         PetscInt row = bs*i + k;
3319         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3320           PetscInt col = colidxs[j];
3321           if (!sbaij || col >= row) {
3322             if (col >= cs && col < ce) {
3323               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3324             } else {
3325               PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done));
3326               if (done) o_nnz[i]++;
3327             }
3328           }
3329         }
3330       }
3331     }
3332     PetscCall(PetscBTDestroy(&bt));
3333     PetscCall(PetscHSetIDestroy(&ht));
3334     PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz));
3335     PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz));
3336     PetscCall(PetscFree2(d_nnz,o_nnz));
3337   }
3338 
3339   /* store matrix values */
3340   for (i=0; i<m; i++) {
3341     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3342     PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES));
3343   }
3344 
3345   PetscCall(PetscFree(rowidxs));
3346   PetscCall(PetscFree2(colidxs,matvals));
3347   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
3348   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3353 {
3354   PetscBool isbinary;
3355 
3356   PetscFunctionBegin;
3357   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3358   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3359   PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer));
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 /*@
3364    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3365 
3366    Input Parameters:
3367 +  mat  - the matrix
3368 -  fact - factor
3369 
3370    Not Collective, each process can use a different factor
3371 
3372    Level: advanced
3373 
3374   Notes:
3375    This can also be set by the command line option: -mat_use_hash_table <fact>
3376 
3377 .seealso: `MatSetOption()`
3378 @*/
3379 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3380 {
3381   PetscFunctionBegin;
3382   PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));
3383   PetscFunctionReturn(0);
3384 }
3385 
3386 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3387 {
3388   Mat_MPIBAIJ *baij;
3389 
3390   PetscFunctionBegin;
3391   baij          = (Mat_MPIBAIJ*)mat->data;
3392   baij->ht_fact = fact;
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3397 {
3398   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3399   PetscBool    flg;
3400 
3401   PetscFunctionBegin;
3402   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg));
3403   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3404   if (Ad)     *Ad     = a->A;
3405   if (Ao)     *Ao     = a->B;
3406   if (colmap) *colmap = a->garray;
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 /*
3411     Special version for direct calls from Fortran (to eliminate two function call overheads
3412 */
3413 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3414 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3415 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3416 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3417 #endif
3418 
3419 /*@C
3420   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3421 
3422   Collective on Mat
3423 
3424   Input Parameters:
3425 + mat - the matrix
3426 . min - number of input rows
3427 . im - input rows
3428 . nin - number of input columns
3429 . in - input columns
3430 . v - numerical values input
3431 - addvin - INSERT_VALUES or ADD_VALUES
3432 
3433   Notes:
3434     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3435 
3436   Level: advanced
3437 
3438 .seealso: `MatSetValuesBlocked()`
3439 @*/
3440 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3441 {
3442   /* convert input arguments to C version */
3443   Mat        mat  = *matin;
3444   PetscInt   m    = *min, n = *nin;
3445   InsertMode addv = *addvin;
3446 
3447   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3448   const MatScalar *value;
3449   MatScalar       *barray     = baij->barray;
3450   PetscBool       roworiented = baij->roworiented;
3451   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3452   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3453   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3454 
3455   PetscFunctionBegin;
3456   /* tasks normally handled by MatSetValuesBlocked() */
3457   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3458   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3459   PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3460   if (mat->assembled) {
3461     mat->was_assembled = PETSC_TRUE;
3462     mat->assembled     = PETSC_FALSE;
3463   }
3464   PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0));
3465 
3466   if (!barray) {
3467     PetscCall(PetscMalloc1(bs2,&barray));
3468     baij->barray = barray;
3469   }
3470 
3471   if (roworiented) stepval = (n-1)*bs;
3472   else stepval = (m-1)*bs;
3473 
3474   for (i=0; i<m; i++) {
3475     if (im[i] < 0) continue;
3476     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
3477     if (im[i] >= rstart && im[i] < rend) {
3478       row = im[i] - rstart;
3479       for (j=0; j<n; j++) {
3480         /* If NumCol = 1 then a copy is not required */
3481         if ((roworiented) && (n == 1)) {
3482           barray = (MatScalar*)v + i*bs2;
3483         } else if ((!roworiented) && (m == 1)) {
3484           barray = (MatScalar*)v + j*bs2;
3485         } else { /* Here a copy is required */
3486           if (roworiented) {
3487             value = v + i*(stepval+bs)*bs + j*bs;
3488           } else {
3489             value = v + j*(stepval+bs)*bs + i*bs;
3490           }
3491           for (ii=0; ii<bs; ii++,value+=stepval) {
3492             for (jj=0; jj<bs; jj++) {
3493               *barray++ = *value++;
3494             }
3495           }
3496           barray -=bs2;
3497         }
3498 
3499         if (in[j] >= cstart && in[j] < cend) {
3500           col  = in[j] - cstart;
3501           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
3502         } else if (in[j] < 0) continue;
3503         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
3504         else {
3505           if (mat->was_assembled) {
3506             if (!baij->colmap) {
3507               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3508             }
3509 
3510 #if defined(PETSC_USE_DEBUG)
3511 #if defined(PETSC_USE_CTABLE)
3512             { PetscInt data;
3513               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
3514               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3515             }
3516 #else
3517             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3518 #endif
3519 #endif
3520 #if defined(PETSC_USE_CTABLE)
3521             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
3522             col  = (col - 1)/bs;
3523 #else
3524             col = (baij->colmap[in[j]] - 1)/bs;
3525 #endif
3526             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3527               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3528               col  =  in[j];
3529             }
3530           } else col = in[j];
3531           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
3532         }
3533       }
3534     } else {
3535       if (!baij->donotstash) {
3536         if (roworiented) {
3537           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
3538         } else {
3539           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
3540         }
3541       }
3542     }
3543   }
3544 
3545   /* task normally handled by MatSetValuesBlocked() */
3546   PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0));
3547   PetscFunctionReturn(0);
3548 }
3549 
3550 /*@
3551      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3552          CSR format the local rows.
3553 
3554    Collective
3555 
3556    Input Parameters:
3557 +  comm - MPI communicator
3558 .  bs - the block size, only a block size of 1 is supported
3559 .  m - number of local rows (Cannot be PETSC_DECIDE)
3560 .  n - This value should be the same as the local size used in creating the
3561        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3562        calculated if N is given) For square matrices n is almost always m.
3563 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3564 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3565 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3566 .   j - column indices
3567 -   a - matrix values
3568 
3569    Output Parameter:
3570 .   mat - the matrix
3571 
3572    Level: intermediate
3573 
3574    Notes:
3575        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3576      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3577      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3578 
3579      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3580      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3581      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3582      with column-major ordering within blocks.
3583 
3584        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3585 
3586 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3587           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3588 @*/
3589 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3590 {
3591   PetscFunctionBegin;
3592   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3593   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3594   PetscCall(MatCreate(comm,mat));
3595   PetscCall(MatSetSizes(*mat,m,n,M,N));
3596   PetscCall(MatSetType(*mat,MATMPIBAIJ));
3597   PetscCall(MatSetBlockSize(*mat,bs));
3598   PetscCall(MatSetUp(*mat));
3599   PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE));
3600   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a));
3601   PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE));
3602   PetscFunctionReturn(0);
3603 }
3604 
3605 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3606 {
3607   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3608   PetscInt       *indx;
3609   PetscScalar    *values;
3610 
3611   PetscFunctionBegin;
3612   PetscCall(MatGetSize(inmat,&m,&N));
3613   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3614     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3615     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3616     PetscInt       *bindx,rmax=a->rmax,j;
3617     PetscMPIInt    rank,size;
3618 
3619     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
3620     mbs = m/bs; Nbs = N/cbs;
3621     if (n == PETSC_DECIDE) {
3622       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
3623     }
3624     nbs = n/cbs;
3625 
3626     PetscCall(PetscMalloc1(rmax,&bindx));
3627     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
3628 
3629     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3630     PetscCallMPI(MPI_Comm_rank(comm,&size));
3631     if (rank == size-1) {
3632       /* Check sum(nbs) = Nbs */
3633       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
3634     }
3635 
3636     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3637     for (i=0; i<mbs; i++) {
3638       PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
3639       nnz = nnz/bs;
3640       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3641       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
3642       PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL));
3643     }
3644     PetscCall(PetscFree(bindx));
3645 
3646     PetscCall(MatCreate(comm,outmat));
3647     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
3648     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
3649     PetscCall(MatSetType(*outmat,MATBAIJ));
3650     PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz));
3651     PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
3652     MatPreallocateEnd(dnz,onz);
3653     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3654   }
3655 
3656   /* numeric phase */
3657   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
3658   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
3659 
3660   for (i=0; i<m; i++) {
3661     PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values));
3662     Ii   = i + rstart;
3663     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
3664     PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values));
3665   }
3666   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
3667   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
3668   PetscFunctionReturn(0);
3669 }
3670