xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e2606525cfbd3bdd7da360804ad5c3bf3ebbf4fb)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
15   PetscScalar       *va,*vv;
16   Vec               vB,vA;
17   const PetscScalar *vb;
18 
19   PetscFunctionBegin;
20   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
21   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
22 
23   PetscCall(VecGetArrayWrite(vA,&va));
24   if (idx) {
25     for (i=0; i<m; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
31   PetscCall(PetscMalloc1(m,&idxb));
32   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
33 
34   PetscCall(VecGetArrayWrite(v,&vv));
35   PetscCall(VecGetArrayRead(vB,&vb));
36   for (i=0; i<m; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
38       vv[i] = vb[i];
39       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
40     } else {
41       vv[i] = va[i];
42       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
43         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
44     }
45   }
46   PetscCall(VecRestoreArrayWrite(vA,&vv));
47   PetscCall(VecRestoreArrayWrite(vA,&va));
48   PetscCall(VecRestoreArrayRead(vB,&vb));
49   PetscCall(PetscFree(idxb));
50   PetscCall(VecDestroy(&vA));
51   PetscCall(VecDestroy(&vB));
52   PetscFunctionReturn(0);
53 }
54 
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58 
59   PetscFunctionBegin;
60   PetscCall(MatStoreValues(aij->A));
61   PetscCall(MatStoreValues(aij->B));
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
68 
69   PetscFunctionBegin;
70   PetscCall(MatRetrieveValues(aij->A));
71   PetscCall(MatRetrieveValues(aij->B));
72   PetscFunctionReturn(0);
73 }
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scalable way since the
79    length of colmap equals the global matrix length.
80 */
81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
82 {
83   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
84   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
85   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
86 
87   PetscFunctionBegin;
88 #if defined(PETSC_USE_CTABLE)
89   PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap));
90   for (i=0; i<nbs; i++) {
91     PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES));
92   }
93 #else
94   PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap));
95   PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt)));
96   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
102   { \
103     brow = row/bs;  \
104     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
105     rmax = aimax[brow]; nrow = ailen[brow]; \
106     bcol = col/bs; \
107     ridx = row % bs; cidx = col % bs; \
108     low  = 0; high = nrow; \
109     while (high-low > 3) { \
110       t = (low+high)/2; \
111       if (rp[t] > bcol) high = t; \
112       else              low  = t; \
113     } \
114     for (_i=low; _i<high; _i++) { \
115       if (rp[_i] > bcol) break; \
116       if (rp[_i] == bcol) { \
117         bap = ap +  bs2*_i + bs*cidx + ridx; \
118         if (addv == ADD_VALUES) *bap += value;  \
119         else                    *bap  = value;  \
120         goto a_noinsert; \
121       } \
122     } \
123     if (a->nonew == 1) goto a_noinsert; \
124     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
125     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
126     N = nrow++ - 1;  \
127     /* shift up all the later entries in this row */ \
128     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\
129     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
130     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
131     rp[_i]                      = bcol;  \
132     ap[bs2*_i + bs*cidx + ridx] = value;  \
133 a_noinsert:; \
134     ailen[brow] = nrow; \
135   }
136 
137 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
138   { \
139     brow = row/bs;  \
140     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
141     rmax = bimax[brow]; nrow = bilen[brow]; \
142     bcol = col/bs; \
143     ridx = row % bs; cidx = col % bs; \
144     low  = 0; high = nrow; \
145     while (high-low > 3) { \
146       t = (low+high)/2; \
147       if (rp[t] > bcol) high = t; \
148       else              low  = t; \
149     } \
150     for (_i=low; _i<high; _i++) { \
151       if (rp[_i] > bcol) break; \
152       if (rp[_i] == bcol) { \
153         bap = ap +  bs2*_i + bs*cidx + ridx; \
154         if (addv == ADD_VALUES) *bap += value;  \
155         else                    *bap  = value;  \
156         goto b_noinsert; \
157       } \
158     } \
159     if (b->nonew == 1) goto b_noinsert; \
160     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
161     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
162     N = nrow++ - 1;  \
163     /* shift up all the later entries in this row */ \
164     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\
165     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\
166     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
167     rp[_i]                      = bcol;  \
168     ap[bs2*_i + bs*cidx + ridx] = value;  \
169 b_noinsert:; \
170     bilen[brow] = nrow; \
171   }
172 
173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
174 {
175   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
176   MatScalar      value;
177   PetscBool      roworiented = baij->roworiented;
178   PetscInt       i,j,row,col;
179   PetscInt       rstart_orig=mat->rmap->rstart;
180   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
181   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
182 
183   /* Some Variables required in the macro */
184   Mat         A     = baij->A;
185   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
186   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
187   MatScalar   *aa   =a->a;
188 
189   Mat         B     = baij->B;
190   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
191   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
192   MatScalar   *ba   =b->a;
193 
194   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
195   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
196   MatScalar *ap,*bap;
197 
198   PetscFunctionBegin;
199   for (i=0; i<m; i++) {
200     if (im[i] < 0) continue;
201     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
202     if (im[i] >= rstart_orig && im[i] < rend_orig) {
203       row = im[i] - rstart_orig;
204       for (j=0; j<n; j++) {
205         if (in[j] >= cstart_orig && in[j] < cend_orig) {
206           col = in[j] - cstart_orig;
207           if (roworiented) value = v[i*n+j];
208           else             value = v[i+j*m];
209           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
210         } else if (in[j] < 0) {
211           continue;
212         } else {
213           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
214           if (mat->was_assembled) {
215             if (!baij->colmap) {
216               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
217             }
218 #if defined(PETSC_USE_CTABLE)
219             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
220             col  = col - 1;
221 #else
222             col = baij->colmap[in[j]/bs] - 1;
223 #endif
224             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
225               PetscCall(MatDisAssemble_MPIBAIJ(mat));
226               col  =  in[j];
227               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
228               B    = baij->B;
229               b    = (Mat_SeqBAIJ*)(B)->data;
230               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
231               ba   =b->a;
232             } else {
233               PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
234               col += in[j]%bs;
235             }
236           } else col = in[j];
237           if (roworiented) value = v[i*n+j];
238           else             value = v[i+j*m];
239           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
240           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
241         }
242       }
243     } else {
244       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
245       if (!baij->donotstash) {
246         mat->assembled = PETSC_FALSE;
247         if (roworiented) {
248           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE));
249         } else {
250           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE));
251         }
252       }
253     }
254   }
255   PetscFunctionReturn(0);
256 }
257 
258 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
259 {
260   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
261   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
262   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
263   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
264   PetscBool         roworiented=a->roworiented;
265   const PetscScalar *value     = v;
266   MatScalar         *ap,*aa = a->a,*bap;
267 
268   PetscFunctionBegin;
269   rp   = aj + ai[row];
270   ap   = aa + bs2*ai[row];
271   rmax = imax[row];
272   nrow = ailen[row];
273   value = v;
274   low = 0;
275   high = nrow;
276   while (high-low > 7) {
277     t = (low+high)/2;
278     if (rp[t] > col) high = t;
279     else             low  = t;
280   }
281   for (i=low; i<high; i++) {
282     if (rp[i] > col) break;
283     if (rp[i] == col) {
284       bap = ap +  bs2*i;
285       if (roworiented) {
286         if (is == ADD_VALUES) {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=ii; jj<bs2; jj+=bs) {
289               bap[jj] += *value++;
290             }
291           }
292         } else {
293           for (ii=0; ii<bs; ii++) {
294             for (jj=ii; jj<bs2; jj+=bs) {
295               bap[jj] = *value++;
296             }
297           }
298         }
299       } else {
300         if (is == ADD_VALUES) {
301           for (ii=0; ii<bs; ii++,value+=bs) {
302             for (jj=0; jj<bs; jj++) {
303               bap[jj] += value[jj];
304             }
305             bap += bs;
306           }
307         } else {
308           for (ii=0; ii<bs; ii++,value+=bs) {
309             for (jj=0; jj<bs; jj++) {
310               bap[jj]  = value[jj];
311             }
312             bap += bs;
313           }
314         }
315       }
316       goto noinsert2;
317     }
318   }
319   if (nonew == 1) goto noinsert2;
320   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
321   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
322   N = nrow++ - 1; high++;
323   /* shift up all the later entries in this row */
324   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
325   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
326   rp[i] = col;
327   bap   = ap +  bs2*i;
328   if (roworiented) {
329     for (ii=0; ii<bs; ii++) {
330       for (jj=ii; jj<bs2; jj+=bs) {
331         bap[jj] = *value++;
332       }
333     }
334   } else {
335     for (ii=0; ii<bs; ii++) {
336       for (jj=0; jj<bs; jj++) {
337         *bap++ = *value++;
338       }
339     }
340   }
341   noinsert2:;
342   ailen[row] = nrow;
343   PetscFunctionReturn(0);
344 }
345 
346 /*
347     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
348     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
349 */
350 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
351 {
352   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
353   const PetscScalar *value;
354   MatScalar         *barray     = baij->barray;
355   PetscBool         roworiented = baij->roworiented;
356   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
357   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
358   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
359 
360   PetscFunctionBegin;
361   if (!barray) {
362     PetscCall(PetscMalloc1(bs2,&barray));
363     baij->barray = barray;
364   }
365 
366   if (roworiented) stepval = (n-1)*bs;
367   else stepval = (m-1)*bs;
368 
369   for (i=0; i<m; i++) {
370     if (im[i] < 0) continue;
371     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
372     if (im[i] >= rstart && im[i] < rend) {
373       row = im[i] - rstart;
374       for (j=0; j<n; j++) {
375         /* If NumCol = 1 then a copy is not required */
376         if ((roworiented) && (n == 1)) {
377           barray = (MatScalar*)v + i*bs2;
378         } else if ((!roworiented) && (m == 1)) {
379           barray = (MatScalar*)v + j*bs2;
380         } else { /* Here a copy is required */
381           if (roworiented) {
382             value = v + (i*(stepval+bs) + j)*bs;
383           } else {
384             value = v + (j*(stepval+bs) + i)*bs;
385           }
386           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
387             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
388             barray += bs;
389           }
390           barray -= bs2;
391         }
392 
393         if (in[j] >= cstart && in[j] < cend) {
394           col  = in[j] - cstart;
395           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
396         } else if (in[j] < 0) {
397           continue;
398         } else {
399           PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
400           if (mat->was_assembled) {
401             if (!baij->colmap) {
402               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
403             }
404 
405 #if defined(PETSC_USE_DEBUG)
406 #if defined(PETSC_USE_CTABLE)
407             { PetscInt data;
408               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
409               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
410             }
411 #else
412             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413 #endif
414 #endif
415 #if defined(PETSC_USE_CTABLE)
416             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
417             col  = (col - 1)/bs;
418 #else
419             col = (baij->colmap[in[j]] - 1)/bs;
420 #endif
421             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
422               PetscCall(MatDisAssemble_MPIBAIJ(mat));
423               col  =  in[j];
424             } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]);
425           } else col = in[j];
426           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
427         }
428       }
429     } else {
430       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
431       if (!baij->donotstash) {
432         if (roworiented) {
433           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
434         } else {
435           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
436         }
437       }
438     }
439   }
440   PetscFunctionReturn(0);
441 }
442 
443 #define HASH_KEY 0.6180339887
444 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
445 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
446 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
447 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
448 {
449   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
450   PetscBool      roworiented = baij->roworiented;
451   PetscInt       i,j,row,col;
452   PetscInt       rstart_orig=mat->rmap->rstart;
453   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
454   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
455   PetscReal      tmp;
456   MatScalar      **HD = baij->hd,value;
457   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
458 
459   PetscFunctionBegin;
460   for (i=0; i<m; i++) {
461     if (PetscDefined(USE_DEBUG)) {
462       PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
463       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
464     }
465     row = im[i];
466     if (row >= rstart_orig && row < rend_orig) {
467       for (j=0; j<n; j++) {
468         col = in[j];
469         if (roworiented) value = v[i*n+j];
470         else             value = v[i+j*m];
471         /* Look up PetscInto the Hash Table */
472         key = (row/bs)*Nbs+(col/bs)+1;
473         h1  = HASH(size,key,tmp);
474 
475         idx = h1;
476         if (PetscDefined(USE_DEBUG)) {
477           insert_ct++;
478           total_ct++;
479           if (HT[idx] != key) {
480             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
481             if (idx == size) {
482               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
483               PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
484             }
485           }
486         } else if (HT[idx] != key) {
487           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
488           if (idx == size) {
489             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
490             PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
491           }
492         }
493         /* A HASH table entry is found, so insert the values at the correct address */
494         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
495         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
496       }
497     } else if (!baij->donotstash) {
498       if (roworiented) {
499         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE));
500       } else {
501         PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE));
502       }
503     }
504   }
505   if (PetscDefined(USE_DEBUG)) {
506     baij->ht_total_ct  += total_ct;
507     baij->ht_insert_ct += insert_ct;
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
513 {
514   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
515   PetscBool         roworiented = baij->roworiented;
516   PetscInt          i,j,ii,jj,row,col;
517   PetscInt          rstart=baij->rstartbs;
518   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
519   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
520   PetscReal         tmp;
521   MatScalar         **HD = baij->hd,*baij_a;
522   const PetscScalar *v_t,*value;
523   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
524 
525   PetscFunctionBegin;
526   if (roworiented) stepval = (n-1)*bs;
527   else stepval = (m-1)*bs;
528 
529   for (i=0; i<m; i++) {
530     if (PetscDefined(USE_DEBUG)) {
531       PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]);
532       PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
533     }
534     row = im[i];
535     v_t = v + i*nbs2;
536     if (row >= rstart && row < rend) {
537       for (j=0; j<n; j++) {
538         col = in[j];
539 
540         /* Look up into the Hash Table */
541         key = row*Nbs+col+1;
542         h1  = HASH(size,key,tmp);
543 
544         idx = h1;
545         if (PetscDefined(USE_DEBUG)) {
546           total_ct++;
547           insert_ct++;
548           if (HT[idx] != key) {
549             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
550             if (idx == size) {
551               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
552               PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
553             }
554           }
555         } else if (HT[idx] != key) {
556           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
557           if (idx == size) {
558             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
559             PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
560           }
561         }
562         baij_a = HD[idx];
563         if (roworiented) {
564           /*value = v + i*(stepval+bs)*bs + j*bs;*/
565           /* value = v + (i*(stepval+bs)+j)*bs; */
566           value = v_t;
567           v_t  += bs;
568           if (addv == ADD_VALUES) {
569             for (ii=0; ii<bs; ii++,value+=stepval) {
570               for (jj=ii; jj<bs2; jj+=bs) {
571                 baij_a[jj] += *value++;
572               }
573             }
574           } else {
575             for (ii=0; ii<bs; ii++,value+=stepval) {
576               for (jj=ii; jj<bs2; jj+=bs) {
577                 baij_a[jj] = *value++;
578               }
579             }
580           }
581         } else {
582           value = v + j*(stepval+bs)*bs + i*bs;
583           if (addv == ADD_VALUES) {
584             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
585               for (jj=0; jj<bs; jj++) {
586                 baij_a[jj] += *value++;
587               }
588             }
589           } else {
590             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
591               for (jj=0; jj<bs; jj++) {
592                 baij_a[jj] = *value++;
593               }
594             }
595           }
596         }
597       }
598     } else {
599       if (!baij->donotstash) {
600         if (roworiented) {
601           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
602         } else {
603           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
604         }
605       }
606     }
607   }
608   if (PetscDefined(USE_DEBUG)) {
609     baij->ht_total_ct  += total_ct;
610     baij->ht_insert_ct += insert_ct;
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
616 {
617   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
618   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
619   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
620 
621   PetscFunctionBegin;
622   for (i=0; i<m; i++) {
623     if (idxm[i] < 0) continue; /* negative row */
624     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
625     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626       row = idxm[i] - bsrstart;
627       for (j=0; j<n; j++) {
628         if (idxn[j] < 0) continue; /* negative column */
629         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
630         if (idxn[j] >= bscstart && idxn[j] < bscend) {
631           col  = idxn[j] - bscstart;
632           PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
633         } else {
634           if (!baij->colmap) {
635             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
636           }
637 #if defined(PETSC_USE_CTABLE)
638           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
639           data--;
640 #else
641           data = baij->colmap[idxn[j]/bs]-1;
642 #endif
643           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644           else {
645             col  = data + idxn[j]%bs;
646             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
647           }
648         }
649       }
650     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
656 {
657   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
658   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
659   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
660   PetscReal      sum = 0.0;
661   MatScalar      *v;
662 
663   PetscFunctionBegin;
664   if (baij->size == 1) {
665     PetscCall(MatNorm(baij->A,type,nrm));
666   } else {
667     if (type == NORM_FROBENIUS) {
668       v  = amat->a;
669       nz = amat->nz*bs2;
670       for (i=0; i<nz; i++) {
671         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
672       }
673       v  = bmat->a;
674       nz = bmat->nz*bs2;
675       for (i=0; i<nz; i++) {
676         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
677       }
678       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
679       *nrm = PetscSqrtReal(*nrm);
680     } else if (type == NORM_1) { /* max column sum */
681       PetscReal *tmp,*tmp2;
682       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
683       PetscCall(PetscCalloc1(mat->cmap->N,&tmp));
684       PetscCall(PetscMalloc1(mat->cmap->N,&tmp2));
685       v    = amat->a; jj = amat->j;
686       for (i=0; i<amat->nz; i++) {
687         for (j=0; j<bs; j++) {
688           col = bs*(cstart + *jj) + j; /* column index */
689           for (row=0; row<bs; row++) {
690             tmp[col] += PetscAbsScalar(*v);  v++;
691           }
692         }
693         jj++;
694       }
695       v = bmat->a; jj = bmat->j;
696       for (i=0; i<bmat->nz; i++) {
697         for (j=0; j<bs; j++) {
698           col = bs*garray[*jj] + j;
699           for (row=0; row<bs; row++) {
700             tmp[col] += PetscAbsScalar(*v); v++;
701           }
702         }
703         jj++;
704       }
705       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
706       *nrm = 0.0;
707       for (j=0; j<mat->cmap->N; j++) {
708         if (tmp2[j] > *nrm) *nrm = tmp2[j];
709       }
710       PetscCall(PetscFree(tmp));
711       PetscCall(PetscFree(tmp2));
712     } else if (type == NORM_INFINITY) { /* max row sum */
713       PetscReal *sums;
714       PetscCall(PetscMalloc1(bs,&sums));
715       sum  = 0.0;
716       for (j=0; j<amat->mbs; j++) {
717         for (row=0; row<bs; row++) sums[row] = 0.0;
718         v  = amat->a + bs2*amat->i[j];
719         nz = amat->i[j+1]-amat->i[j];
720         for (i=0; i<nz; i++) {
721           for (col=0; col<bs; col++) {
722             for (row=0; row<bs; row++) {
723               sums[row] += PetscAbsScalar(*v); v++;
724             }
725           }
726         }
727         v  = bmat->a + bs2*bmat->i[j];
728         nz = bmat->i[j+1]-bmat->i[j];
729         for (i=0; i<nz; i++) {
730           for (col=0; col<bs; col++) {
731             for (row=0; row<bs; row++) {
732               sums[row] += PetscAbsScalar(*v); v++;
733             }
734           }
735         }
736         for (row=0; row<bs; row++) {
737           if (sums[row] > sum) sum = sums[row];
738         }
739       }
740       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
741       PetscCall(PetscFree(sums));
742     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 /*
748   Creates the hash table, and sets the table
749   This table is created only once.
750   If new entried need to be added to the matrix
751   then the hash table has to be destroyed and
752   recreated.
753 */
754 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
755 {
756   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
757   Mat            A     = baij->A,B=baij->B;
758   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
759   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
760   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
761   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
762   PetscInt       *HT,key;
763   MatScalar      **HD;
764   PetscReal      tmp;
765 #if defined(PETSC_USE_INFO)
766   PetscInt ct=0,max=0;
767 #endif
768 
769   PetscFunctionBegin;
770   if (baij->ht) PetscFunctionReturn(0);
771 
772   baij->ht_size = (PetscInt)(factor*nz);
773   ht_size       = baij->ht_size;
774 
775   /* Allocate Memory for Hash Table */
776   PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht));
777   HD   = baij->hd;
778   HT   = baij->ht;
779 
780   /* Loop Over A */
781   for (i=0; i<a->mbs; i++) {
782     for (j=ai[i]; j<ai[i+1]; j++) {
783       row = i+rstart;
784       col = aj[j]+cstart;
785 
786       key = row*Nbs + col + 1;
787       h1  = HASH(ht_size,key,tmp);
788       for (k=0; k<ht_size; k++) {
789         if (!HT[(h1+k)%ht_size]) {
790           HT[(h1+k)%ht_size] = key;
791           HD[(h1+k)%ht_size] = a->a + j*bs2;
792           break;
793 #if defined(PETSC_USE_INFO)
794         } else {
795           ct++;
796 #endif
797         }
798       }
799 #if defined(PETSC_USE_INFO)
800       if (k> max) max = k;
801 #endif
802     }
803   }
804   /* Loop Over B */
805   for (i=0; i<b->mbs; i++) {
806     for (j=bi[i]; j<bi[i+1]; j++) {
807       row = i+rstart;
808       col = garray[bj[j]];
809       key = row*Nbs + col + 1;
810       h1  = HASH(ht_size,key,tmp);
811       for (k=0; k<ht_size; k++) {
812         if (!HT[(h1+k)%ht_size]) {
813           HT[(h1+k)%ht_size] = key;
814           HD[(h1+k)%ht_size] = b->a + j*bs2;
815           break;
816 #if defined(PETSC_USE_INFO)
817         } else {
818           ct++;
819 #endif
820         }
821       }
822 #if defined(PETSC_USE_INFO)
823       if (k> max) max = k;
824 #endif
825     }
826   }
827 
828   /* Print Summary */
829 #if defined(PETSC_USE_INFO)
830   for (i=0,j=0; i<ht_size; i++) {
831     if (HT[i]) j++;
832   }
833   PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max));
834 #endif
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
839 {
840   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
841   PetscInt       nstash,reallocs;
842 
843   PetscFunctionBegin;
844   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
845 
846   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
847   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
848   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
849   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
850   PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs));
851   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
852   PetscFunctionReturn(0);
853 }
854 
855 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
856 {
857   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
858   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
859   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
860   PetscInt       *row,*col;
861   PetscBool      r1,r2,r3,other_disassembled;
862   MatScalar      *val;
863   PetscMPIInt    n;
864 
865   PetscFunctionBegin;
866   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) {
875           if (row[j] != rstart) break;
876         }
877         if (j < n) ncols = j-i;
878         else       ncols = n-i;
879         /* Now assemble all these values with a single function call */
880         PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
881         i    = j;
882       }
883     }
884     PetscCall(MatStashScatterEnd_Private(&mat->stash));
885     /* Now process the block-stash. Since the values are stashed column-oriented,
886        set the roworiented flag to column oriented, and after MatSetValues()
887        restore the original flags */
888     r1 = baij->roworiented;
889     r2 = a->roworiented;
890     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
891 
892     baij->roworiented = PETSC_FALSE;
893     a->roworiented    = PETSC_FALSE;
894 
895     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
896     while (1) {
897       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
898       if (!flg) break;
899 
900       for (i=0; i<n;) {
901         /* Now identify the consecutive vals belonging to the same row */
902         for (j=i,rstart=row[j]; j<n; j++) {
903           if (row[j] != rstart) break;
904         }
905         if (j < n) ncols = j-i;
906         else       ncols = n-i;
907         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
908         i    = j;
909       }
910     }
911     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
912 
913     baij->roworiented = r1;
914     a->roworiented    = r2;
915 
916     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
917   }
918 
919   PetscCall(MatAssemblyBegin(baij->A,mode));
920   PetscCall(MatAssemblyEnd(baij->A,mode));
921 
922   /* determine if any processor has disassembled, if so we must
923      also disassemble ourselves, in order that we may reassemble. */
924   /*
925      if nonzero structure of submatrix B cannot change then we know that
926      no processor disassembled thus we can skip this stuff
927   */
928   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
929     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
930     if (mat->was_assembled && !other_disassembled) {
931       PetscCall(MatDisAssemble_MPIBAIJ(mat));
932     }
933   }
934 
935   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
936     PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
937   }
938   PetscCall(MatAssemblyBegin(baij->B,mode));
939   PetscCall(MatAssemblyEnd(baij->B,mode));
940 
941 #if defined(PETSC_USE_INFO)
942   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
943     PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));
944 
945     baij->ht_total_ct  = 0;
946     baij->ht_insert_ct = 0;
947   }
948 #endif
949   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
950     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact));
951 
952     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
953     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
954   }
955 
956   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
957 
958   baij->rowvalues = NULL;
959 
960   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
961   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
962     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
963     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
964   }
965   PetscFunctionReturn(0);
966 }
967 
968 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
969 #include <petscdraw.h>
970 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
971 {
972   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
973   PetscMPIInt       rank = baij->rank;
974   PetscInt          bs   = mat->rmap->bs;
975   PetscBool         iascii,isdraw;
976   PetscViewer       sviewer;
977   PetscViewerFormat format;
978 
979   PetscFunctionBegin;
980   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
981   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
982   if (iascii) {
983     PetscCall(PetscViewerGetFormat(viewer,&format));
984     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
985       MatInfo info;
986       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
987       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
988       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
989       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",
990                                                  rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
991       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
992       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
993       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
994       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
995       PetscCall(PetscViewerFlush(viewer));
996       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
997       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
998       PetscCall(VecScatterView(baij->Mvctx,viewer));
999       PetscFunctionReturn(0);
1000     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1001       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
1002       PetscFunctionReturn(0);
1003     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1004       PetscFunctionReturn(0);
1005     }
1006   }
1007 
1008   if (isdraw) {
1009     PetscDraw draw;
1010     PetscBool isnull;
1011     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1012     PetscCall(PetscDrawIsNull(draw,&isnull));
1013     if (isnull) PetscFunctionReturn(0);
1014   }
1015 
1016   {
1017     /* assemble the entire matrix onto first processor. */
1018     Mat         A;
1019     Mat_SeqBAIJ *Aloc;
1020     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1021     MatScalar   *a;
1022     const char  *matname;
1023 
1024     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1025     /* Perhaps this should be the type of mat? */
1026     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
1027     if (rank == 0) {
1028       PetscCall(MatSetSizes(A,M,N,M,N));
1029     } else {
1030       PetscCall(MatSetSizes(A,0,0,M,N));
1031     }
1032     PetscCall(MatSetType(A,MATMPIBAIJ));
1033     PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
1034     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
1035     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
1036 
1037     /* copy over the A part */
1038     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1039     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1040     PetscCall(PetscMalloc1(bs,&rvals));
1041 
1042     for (i=0; i<mbs; i++) {
1043       rvals[0] = bs*(baij->rstartbs + i);
1044       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1045       for (j=ai[i]; j<ai[i+1]; j++) {
1046         col = (baij->cstartbs+aj[j])*bs;
1047         for (k=0; k<bs; k++) {
1048           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
1049           col++; a += bs;
1050         }
1051       }
1052     }
1053     /* copy over the B part */
1054     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1055     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1056     for (i=0; i<mbs; i++) {
1057       rvals[0] = bs*(baij->rstartbs + i);
1058       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1059       for (j=ai[i]; j<ai[i+1]; j++) {
1060         col = baij->garray[aj[j]]*bs;
1061         for (k=0; k<bs; k++) {
1062           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
1063           col++; a += bs;
1064         }
1065       }
1066     }
1067     PetscCall(PetscFree(rvals));
1068     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1069     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1070     /*
1071        Everyone has to call to draw the matrix since the graphics waits are
1072        synchronized across all processors that share the PetscDraw object
1073     */
1074     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1075     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
1076     if (rank == 0) {
1077       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname));
1078       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer));
1079     }
1080     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1081     PetscCall(PetscViewerFlush(viewer));
1082     PetscCall(MatDestroy(&A));
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1089 {
1090   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1091   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1092   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1093   const PetscInt *garray = aij->garray;
1094   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1095   PetscInt       *rowlens,*colidxs;
1096   PetscScalar    *matvals;
1097 
1098   PetscFunctionBegin;
1099   PetscCall(PetscViewerSetUp(viewer));
1100 
1101   M  = mat->rmap->N;
1102   N  = mat->cmap->N;
1103   m  = mat->rmap->n;
1104   rs = mat->rmap->rstart;
1105   cs = mat->cmap->rstart;
1106   bs = mat->rmap->bs;
1107   nz = bs*bs*(A->nz + B->nz);
1108 
1109   /* write matrix header */
1110   header[0] = MAT_FILE_CLASSID;
1111   header[1] = M; header[2] = N; header[3] = nz;
1112   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1113   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1114 
1115   /* fill in and store row lengths */
1116   PetscCall(PetscMalloc1(m,&rowlens));
1117   for (cnt=0, i=0; i<A->mbs; i++)
1118     for (j=0; j<bs; j++)
1119       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1120   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1121   PetscCall(PetscFree(rowlens));
1122 
1123   /* fill in and store column indices */
1124   PetscCall(PetscMalloc1(nz,&colidxs));
1125   for (cnt=0, i=0; i<A->mbs; i++) {
1126     for (k=0; k<bs; k++) {
1127       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1128         if (garray[B->j[jb]] > cs/bs) break;
1129         for (l=0; l<bs; l++)
1130           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1131       }
1132       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1133         for (l=0; l<bs; l++)
1134           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1135       for (; jb<B->i[i+1]; jb++)
1136         for (l=0; l<bs; l++)
1137           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1138     }
1139   }
1140   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1141   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT));
1142   PetscCall(PetscFree(colidxs));
1143 
1144   /* fill in and store nonzero values */
1145   PetscCall(PetscMalloc1(nz,&matvals));
1146   for (cnt=0, i=0; i<A->mbs; i++) {
1147     for (k=0; k<bs; k++) {
1148       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1149         if (garray[B->j[jb]] > cs/bs) break;
1150         for (l=0; l<bs; l++)
1151           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1152       }
1153       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1154         for (l=0; l<bs; l++)
1155           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1156       for (; jb<B->i[i+1]; jb++)
1157         for (l=0; l<bs; l++)
1158           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1159     }
1160   }
1161   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR));
1162   PetscCall(PetscFree(matvals));
1163 
1164   /* write block size option to the viewer's .info file */
1165   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscBool      iascii,isdraw,issocket,isbinary;
1172 
1173   PetscFunctionBegin;
1174   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1175   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1176   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1177   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1178   if (iascii || isdraw || issocket) {
1179     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer));
1180   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat,viewer));
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1185 {
1186   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1187 
1188   PetscFunctionBegin;
1189 #if defined(PETSC_USE_LOG)
1190   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1191 #endif
1192   PetscCall(MatStashDestroy_Private(&mat->stash));
1193   PetscCall(MatStashDestroy_Private(&mat->bstash));
1194   PetscCall(MatDestroy(&baij->A));
1195   PetscCall(MatDestroy(&baij->B));
1196 #if defined(PETSC_USE_CTABLE)
1197   PetscCall(PetscTableDestroy(&baij->colmap));
1198 #else
1199   PetscCall(PetscFree(baij->colmap));
1200 #endif
1201   PetscCall(PetscFree(baij->garray));
1202   PetscCall(VecDestroy(&baij->lvec));
1203   PetscCall(VecScatterDestroy(&baij->Mvctx));
1204   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1205   PetscCall(PetscFree(baij->barray));
1206   PetscCall(PetscFree2(baij->hd,baij->ht));
1207   PetscCall(PetscFree(baij->rangebs));
1208   PetscCall(PetscFree(mat->data));
1209 
1210   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL));
1215   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1216   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL));
1217   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL));
1218   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL));
1219   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL));
1220 #if defined(PETSC_HAVE_HYPRE)
1221   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL));
1222 #endif
1223   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL));
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1228 {
1229   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1230   PetscInt       nt;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(VecGetLocalSize(xx,&nt));
1234   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1235   PetscCall(VecGetLocalSize(yy,&nt));
1236   PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1237   PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1238   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
1239   PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1240   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1245 {
1246   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1250   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
1251   PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1252   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1257 {
1258   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1259 
1260   PetscFunctionBegin;
1261   /* do nondiagonal part */
1262   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1263   /* do local part */
1264   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1265   /* add partial results together */
1266   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1267   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1272 {
1273   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1274 
1275   PetscFunctionBegin;
1276   /* do nondiagonal part */
1277   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1278   /* do local part */
1279   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1280   /* add partial results together */
1281   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1282   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 /*
1287   This only works correctly for square matrices where the subblock A->A is the
1288    diagonal block
1289 */
1290 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1291 {
1292   PetscFunctionBegin;
1293   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1294   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v));
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1299 {
1300   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1301 
1302   PetscFunctionBegin;
1303   PetscCall(MatScale(a->A,aa));
1304   PetscCall(MatScale(a->B,aa));
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1309 {
1310   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1311   PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1312   PetscInt    bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1313   PetscInt    nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1314   PetscInt    *cmap,*idx_p,cstart = mat->cstartbs;
1315 
1316   PetscFunctionBegin;
1317   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1318   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1319   mat->getrowactive = PETSC_TRUE;
1320 
1321   if (!mat->rowvalues && (idx || v)) {
1322     /*
1323         allocate enough space to hold information from the longest row.
1324     */
1325     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1326     PetscInt    max = 1,mbs = mat->mbs,tmp;
1327     for (i=0; i<mbs; i++) {
1328       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1329       if (max < tmp) max = tmp;
1330     }
1331     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1332   }
1333   lrow = row - brstart;
1334 
1335   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1336   if (!v)   {pvA = NULL; pvB = NULL;}
1337   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1338   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1339   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1340   nztot = nzA + nzB;
1341 
1342   cmap = mat->garray;
1343   if (v  || idx) {
1344     if (nztot) {
1345       /* Sort by increasing column numbers, assuming A and B already sorted */
1346       PetscInt imark = -1;
1347       if (v) {
1348         *v = v_p = mat->rowvalues;
1349         for (i=0; i<nzB; i++) {
1350           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1351           else break;
1352         }
1353         imark = i;
1354         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1355         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1356       }
1357       if (idx) {
1358         *idx = idx_p = mat->rowindices;
1359         if (imark > -1) {
1360           for (i=0; i<imark; i++) {
1361             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1362           }
1363         } else {
1364           for (i=0; i<nzB; i++) {
1365             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1366             else break;
1367           }
1368           imark = i;
1369         }
1370         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1371         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1372       }
1373     } else {
1374       if (idx) *idx = NULL;
1375       if (v)   *v   = NULL;
1376     }
1377   }
1378   *nz  = nztot;
1379   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1380   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1385 {
1386   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1387 
1388   PetscFunctionBegin;
1389   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1390   baij->getrowactive = PETSC_FALSE;
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1395 {
1396   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1397 
1398   PetscFunctionBegin;
1399   PetscCall(MatZeroEntries(l->A));
1400   PetscCall(MatZeroEntries(l->B));
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1405 {
1406   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1407   Mat            A  = a->A,B = a->B;
1408   PetscLogDouble isend[5],irecv[5];
1409 
1410   PetscFunctionBegin;
1411   info->block_size = (PetscReal)matin->rmap->bs;
1412 
1413   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1414 
1415   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1416   isend[3] = info->memory;  isend[4] = info->mallocs;
1417 
1418   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1419 
1420   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1421   isend[3] += info->memory;  isend[4] += info->mallocs;
1422 
1423   if (flag == MAT_LOCAL) {
1424     info->nz_used      = isend[0];
1425     info->nz_allocated = isend[1];
1426     info->nz_unneeded  = isend[2];
1427     info->memory       = isend[3];
1428     info->mallocs      = isend[4];
1429   } else if (flag == MAT_GLOBAL_MAX) {
1430     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1431 
1432     info->nz_used      = irecv[0];
1433     info->nz_allocated = irecv[1];
1434     info->nz_unneeded  = irecv[2];
1435     info->memory       = irecv[3];
1436     info->mallocs      = irecv[4];
1437   } else if (flag == MAT_GLOBAL_SUM) {
1438     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1439 
1440     info->nz_used      = irecv[0];
1441     info->nz_allocated = irecv[1];
1442     info->nz_unneeded  = irecv[2];
1443     info->memory       = irecv[3];
1444     info->mallocs      = irecv[4];
1445   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1446   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1447   info->fill_ratio_needed = 0;
1448   info->factor_mallocs    = 0;
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1453 {
1454   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1455 
1456   PetscFunctionBegin;
1457   switch (op) {
1458   case MAT_NEW_NONZERO_LOCATIONS:
1459   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1460   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1461   case MAT_KEEP_NONZERO_PATTERN:
1462   case MAT_NEW_NONZERO_LOCATION_ERR:
1463     MatCheckPreallocated(A,1);
1464     PetscCall(MatSetOption(a->A,op,flg));
1465     PetscCall(MatSetOption(a->B,op,flg));
1466     break;
1467   case MAT_ROW_ORIENTED:
1468     MatCheckPreallocated(A,1);
1469     a->roworiented = flg;
1470 
1471     PetscCall(MatSetOption(a->A,op,flg));
1472     PetscCall(MatSetOption(a->B,op,flg));
1473     break;
1474   case MAT_FORCE_DIAGONAL_ENTRIES:
1475   case MAT_SORTED_FULL:
1476     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1477     break;
1478   case MAT_IGNORE_OFF_PROC_ENTRIES:
1479     a->donotstash = flg;
1480     break;
1481   case MAT_USE_HASH_TABLE:
1482     a->ht_flag = flg;
1483     a->ht_fact = 1.39;
1484     break;
1485   case MAT_SYMMETRIC:
1486   case MAT_STRUCTURALLY_SYMMETRIC:
1487   case MAT_HERMITIAN:
1488   case MAT_SUBMAT_SINGLEIS:
1489   case MAT_SYMMETRY_ETERNAL:
1490     MatCheckPreallocated(A,1);
1491     PetscCall(MatSetOption(a->A,op,flg));
1492     break;
1493   default:
1494     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1495   }
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1500 {
1501   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1502   Mat_SeqBAIJ    *Aloc;
1503   Mat            B;
1504   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1505   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1506   MatScalar      *a;
1507 
1508   PetscFunctionBegin;
1509   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1510     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1511     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1512     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1513     /* Do not know preallocation information, but must set block size */
1514     PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL));
1515   } else {
1516     B = *matout;
1517   }
1518 
1519   /* copy over the A part */
1520   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1521   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1522   PetscCall(PetscMalloc1(bs,&rvals));
1523 
1524   for (i=0; i<mbs; i++) {
1525     rvals[0] = bs*(baij->rstartbs + i);
1526     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1527     for (j=ai[i]; j<ai[i+1]; j++) {
1528       col = (baij->cstartbs+aj[j])*bs;
1529       for (k=0; k<bs; k++) {
1530         PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES));
1531 
1532         col++; a += bs;
1533       }
1534     }
1535   }
1536   /* copy over the B part */
1537   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1538   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1539   for (i=0; i<mbs; i++) {
1540     rvals[0] = bs*(baij->rstartbs + i);
1541     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1542     for (j=ai[i]; j<ai[i+1]; j++) {
1543       col = baij->garray[aj[j]]*bs;
1544       for (k=0; k<bs; k++) {
1545         PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES));
1546         col++;
1547         a += bs;
1548       }
1549     }
1550   }
1551   PetscCall(PetscFree(rvals));
1552   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1553   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1554 
1555   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1556   else {
1557     PetscCall(MatHeaderMerge(A,&B));
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1563 {
1564   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1565   Mat            a     = baij->A,b = baij->B;
1566   PetscInt       s1,s2,s3;
1567 
1568   PetscFunctionBegin;
1569   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1570   if (rr) {
1571     PetscCall(VecGetLocalSize(rr,&s1));
1572     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1573     /* Overlap communication with computation. */
1574     PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1575   }
1576   if (ll) {
1577     PetscCall(VecGetLocalSize(ll,&s1));
1578     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1579     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1580   }
1581   /* scale  the diagonal block */
1582   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1583 
1584   if (rr) {
1585     /* Do a scatter end and then right scale the off-diagonal block */
1586     PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1587     PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1593 {
1594   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1595   PetscInt      *lrows;
1596   PetscInt       r, len;
1597   PetscBool      cong;
1598 
1599   PetscFunctionBegin;
1600   /* get locally owned rows */
1601   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
1602   /* fix right hand side if needed */
1603   if (x && b) {
1604     const PetscScalar *xx;
1605     PetscScalar       *bb;
1606 
1607     PetscCall(VecGetArrayRead(x,&xx));
1608     PetscCall(VecGetArray(b,&bb));
1609     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1610     PetscCall(VecRestoreArrayRead(x,&xx));
1611     PetscCall(VecRestoreArray(b,&bb));
1612   }
1613 
1614   /* actually zap the local rows */
1615   /*
1616         Zero the required rows. If the "diagonal block" of the matrix
1617      is square and the user wishes to set the diagonal we use separate
1618      code so that MatSetValues() is not called for each diagonal allocating
1619      new memory, thus calling lots of mallocs and slowing things down.
1620 
1621   */
1622   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1623   PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL));
1624   PetscCall(MatHasCongruentLayouts(A,&cong));
1625   if ((diag != 0.0) && cong) {
1626     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL));
1627   } else if (diag != 0.0) {
1628     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL));
1629     PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1630        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1631     for (r = 0; r < len; ++r) {
1632       const PetscInt row = lrows[r] + A->rmap->rstart;
1633       PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES));
1634     }
1635     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1636     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1637   } else {
1638     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL));
1639   }
1640   PetscCall(PetscFree(lrows));
1641 
1642   /* only change matrix nonzero state if pattern was allowed to be changed */
1643   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1644     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1645     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1651 {
1652   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1653   PetscMPIInt       n = A->rmap->n,p = 0;
1654   PetscInt          i,j,k,r,len = 0,row,col,count;
1655   PetscInt          *lrows,*owners = A->rmap->range;
1656   PetscSFNode       *rrows;
1657   PetscSF           sf;
1658   const PetscScalar *xx;
1659   PetscScalar       *bb,*mask;
1660   Vec               xmask,lmask;
1661   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1662   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1663   PetscScalar       *aa;
1664 
1665   PetscFunctionBegin;
1666   /* Create SF where leaves are input rows and roots are owned rows */
1667   PetscCall(PetscMalloc1(n, &lrows));
1668   for (r = 0; r < n; ++r) lrows[r] = -1;
1669   PetscCall(PetscMalloc1(N, &rrows));
1670   for (r = 0; r < N; ++r) {
1671     const PetscInt idx   = rows[r];
1672     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
1673     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1674       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
1675     }
1676     rrows[r].rank  = p;
1677     rrows[r].index = rows[r] - owners[p];
1678   }
1679   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
1680   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1681   /* Collect flags for rows to be zeroed */
1682   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
1683   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
1684   PetscCall(PetscSFDestroy(&sf));
1685   /* Compress and put in row numbers */
1686   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1687   /* zero diagonal part of matrix */
1688   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
1689   /* handle off diagonal part of matrix */
1690   PetscCall(MatCreateVecs(A,&xmask,NULL));
1691   PetscCall(VecDuplicate(l->lvec,&lmask));
1692   PetscCall(VecGetArray(xmask,&bb));
1693   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1694   PetscCall(VecRestoreArray(xmask,&bb));
1695   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
1696   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
1697   PetscCall(VecDestroy(&xmask));
1698   if (x) {
1699     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
1700     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
1701     PetscCall(VecGetArrayRead(l->lvec,&xx));
1702     PetscCall(VecGetArray(b,&bb));
1703   }
1704   PetscCall(VecGetArray(lmask,&mask));
1705   /* remove zeroed rows of off diagonal matrix */
1706   for (i = 0; i < len; ++i) {
1707     row   = lrows[i];
1708     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1709     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1710     for (k = 0; k < count; ++k) {
1711       aa[0] = 0.0;
1712       aa   += bs;
1713     }
1714   }
1715   /* loop over all elements of off process part of matrix zeroing removed columns*/
1716   for (i = 0; i < l->B->rmap->N; ++i) {
1717     row = i/bs;
1718     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1719       for (k = 0; k < bs; ++k) {
1720         col = bs*baij->j[j] + k;
1721         if (PetscAbsScalar(mask[col])) {
1722           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1723           if (x) bb[i] -= aa[0]*xx[col];
1724           aa[0] = 0.0;
1725         }
1726       }
1727     }
1728   }
1729   if (x) {
1730     PetscCall(VecRestoreArray(b,&bb));
1731     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
1732   }
1733   PetscCall(VecRestoreArray(lmask,&mask));
1734   PetscCall(VecDestroy(&lmask));
1735   PetscCall(PetscFree(lrows));
1736 
1737   /* only change matrix nonzero state if pattern was allowed to be changed */
1738   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1739     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1740     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
1741   }
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1746 {
1747   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1748 
1749   PetscFunctionBegin;
1750   PetscCall(MatSetUnfactored(a->A));
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1755 
1756 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1757 {
1758   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1759   Mat            a,b,c,d;
1760   PetscBool      flg;
1761 
1762   PetscFunctionBegin;
1763   a = matA->A; b = matA->B;
1764   c = matB->A; d = matB->B;
1765 
1766   PetscCall(MatEqual(a,c,&flg));
1767   if (flg) {
1768     PetscCall(MatEqual(b,d,&flg));
1769   }
1770   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1775 {
1776   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1777   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1778 
1779   PetscFunctionBegin;
1780   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1781   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1782     PetscCall(MatCopy_Basic(A,B,str));
1783   } else {
1784     PetscCall(MatCopy(a->A,b->A,str));
1785     PetscCall(MatCopy(a->B,b->B,str));
1786   }
1787   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1792 {
1793   PetscFunctionBegin;
1794   PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1799 {
1800   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1801   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1802   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1803 
1804   PetscFunctionBegin;
1805   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1810 {
1811   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1812   PetscBLASInt   bnz,one=1;
1813   Mat_SeqBAIJ    *x,*y;
1814   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1815 
1816   PetscFunctionBegin;
1817   if (str == SAME_NONZERO_PATTERN) {
1818     PetscScalar alpha = a;
1819     x    = (Mat_SeqBAIJ*)xx->A->data;
1820     y    = (Mat_SeqBAIJ*)yy->A->data;
1821     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
1822     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1823     x    = (Mat_SeqBAIJ*)xx->B->data;
1824     y    = (Mat_SeqBAIJ*)yy->B->data;
1825     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
1826     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1827     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1828   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1829     PetscCall(MatAXPY_Basic(Y,a,X,str));
1830   } else {
1831     Mat      B;
1832     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1833     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1834     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1835     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1836     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1837     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1838     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1839     PetscCall(MatSetType(B,MATMPIBAIJ));
1840     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d));
1841     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1842     PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1843     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1844     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1845     PetscCall(MatHeaderMerge(Y,&B));
1846     PetscCall(PetscFree(nnz_d));
1847     PetscCall(PetscFree(nnz_o));
1848   }
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1853 {
1854   PetscFunctionBegin;
1855   if (PetscDefined(USE_COMPLEX)) {
1856     Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data;
1857 
1858     PetscCall(MatConjugate_SeqBAIJ(a->A));
1859     PetscCall(MatConjugate_SeqBAIJ(a->B));
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1865 {
1866   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1867 
1868   PetscFunctionBegin;
1869   PetscCall(MatRealPart(a->A));
1870   PetscCall(MatRealPart(a->B));
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1875 {
1876   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1877 
1878   PetscFunctionBegin;
1879   PetscCall(MatImaginaryPart(a->A));
1880   PetscCall(MatImaginaryPart(a->B));
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1885 {
1886   IS             iscol_local;
1887   PetscInt       csize;
1888 
1889   PetscFunctionBegin;
1890   PetscCall(ISGetLocalSize(iscol,&csize));
1891   if (call == MAT_REUSE_MATRIX) {
1892     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1893     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1894   } else {
1895     PetscCall(ISAllGather(iscol,&iscol_local));
1896   }
1897   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1898   if (call == MAT_INITIAL_MATRIX) {
1899     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1900     PetscCall(ISDestroy(&iscol_local));
1901   }
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 /*
1906   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1907   in local and then by concatenating the local matrices the end result.
1908   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1909   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1910 */
1911 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1912 {
1913   PetscMPIInt    rank,size;
1914   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1915   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1916   Mat            M,Mreuse;
1917   MatScalar      *vwork,*aa;
1918   MPI_Comm       comm;
1919   IS             isrow_new, iscol_new;
1920   Mat_SeqBAIJ    *aij;
1921 
1922   PetscFunctionBegin;
1923   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
1924   PetscCallMPI(MPI_Comm_rank(comm,&rank));
1925   PetscCallMPI(MPI_Comm_size(comm,&size));
1926   /* The compression and expansion should be avoided. Doesn't point
1927      out errors, might change the indices, hence buggey */
1928   PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new));
1929   PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new));
1930 
1931   if (call ==  MAT_REUSE_MATRIX) {
1932     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
1933     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1934     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse));
1935   } else {
1936     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse));
1937   }
1938   PetscCall(ISDestroy(&isrow_new));
1939   PetscCall(ISDestroy(&iscol_new));
1940   /*
1941       m - number of local rows
1942       n - number of columns (same on all processors)
1943       rstart - first row in new global matrix generated
1944   */
1945   PetscCall(MatGetBlockSize(mat,&bs));
1946   PetscCall(MatGetSize(Mreuse,&m,&n));
1947   m    = m/bs;
1948   n    = n/bs;
1949 
1950   if (call == MAT_INITIAL_MATRIX) {
1951     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1952     ii  = aij->i;
1953     jj  = aij->j;
1954 
1955     /*
1956         Determine the number of non-zeros in the diagonal and off-diagonal
1957         portions of the matrix in order to do correct preallocation
1958     */
1959 
1960     /* first get start and end of "diagonal" columns */
1961     if (csize == PETSC_DECIDE) {
1962       PetscCall(ISGetSize(isrow,&mglobal));
1963       if (mglobal == n*bs) { /* square matrix */
1964         nlocal = m;
1965       } else {
1966         nlocal = n/size + ((n % size) > rank);
1967       }
1968     } else {
1969       nlocal = csize/bs;
1970     }
1971     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
1972     rstart = rend - nlocal;
1973     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
1974 
1975     /* next, compute all the lengths */
1976     PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens));
1977     for (i=0; i<m; i++) {
1978       jend = ii[i+1] - ii[i];
1979       olen = 0;
1980       dlen = 0;
1981       for (j=0; j<jend; j++) {
1982         if (*jj < rstart || *jj >= rend) olen++;
1983         else dlen++;
1984         jj++;
1985       }
1986       olens[i] = olen;
1987       dlens[i] = dlen;
1988     }
1989     PetscCall(MatCreate(comm,&M));
1990     PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n));
1991     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
1992     PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens));
1993     PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens));
1994     PetscCall(PetscFree2(dlens,olens));
1995   } else {
1996     PetscInt ml,nl;
1997 
1998     M    = *newmat;
1999     PetscCall(MatGetLocalSize(M,&ml,&nl));
2000     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2001     PetscCall(MatZeroEntries(M));
2002     /*
2003          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2004        rather than the slower MatSetValues().
2005     */
2006     M->was_assembled = PETSC_TRUE;
2007     M->assembled     = PETSC_FALSE;
2008   }
2009   PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE));
2010   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
2011   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2012   ii   = aij->i;
2013   jj   = aij->j;
2014   aa   = aij->a;
2015   for (i=0; i<m; i++) {
2016     row   = rstart/bs + i;
2017     nz    = ii[i+1] - ii[i];
2018     cwork = jj;     jj += nz;
2019     vwork = aa;     aa += nz*bs*bs;
2020     PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
2021   }
2022 
2023   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
2024   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
2025   *newmat = M;
2026 
2027   /* save submatrix used in processor for next request */
2028   if (call ==  MAT_INITIAL_MATRIX) {
2029     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
2030     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2036 {
2037   MPI_Comm       comm,pcomm;
2038   PetscInt       clocal_size,nrows;
2039   const PetscInt *rows;
2040   PetscMPIInt    size;
2041   IS             crowp,lcolp;
2042 
2043   PetscFunctionBegin;
2044   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2045   /* make a collective version of 'rowp' */
2046   PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm));
2047   if (pcomm==comm) {
2048     crowp = rowp;
2049   } else {
2050     PetscCall(ISGetSize(rowp,&nrows));
2051     PetscCall(ISGetIndices(rowp,&rows));
2052     PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp));
2053     PetscCall(ISRestoreIndices(rowp,&rows));
2054   }
2055   PetscCall(ISSetPermutation(crowp));
2056   /* make a local version of 'colp' */
2057   PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm));
2058   PetscCallMPI(MPI_Comm_size(pcomm,&size));
2059   if (size==1) {
2060     lcolp = colp;
2061   } else {
2062     PetscCall(ISAllGather(colp,&lcolp));
2063   }
2064   PetscCall(ISSetPermutation(lcolp));
2065   /* now we just get the submatrix */
2066   PetscCall(MatGetLocalSize(A,NULL,&clocal_size));
2067   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B));
2068   /* clean up */
2069   if (pcomm!=comm) {
2070     PetscCall(ISDestroy(&crowp));
2071   }
2072   if (size>1) {
2073     PetscCall(ISDestroy(&lcolp));
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2079 {
2080   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2081   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2082 
2083   PetscFunctionBegin;
2084   if (nghosts) *nghosts = B->nbs;
2085   if (ghosts) *ghosts = baij->garray;
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2090 {
2091   Mat            B;
2092   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2093   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2094   Mat_SeqAIJ     *b;
2095   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2096   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2097   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2098 
2099   PetscFunctionBegin;
2100   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2101   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2102 
2103   /* ----------------------------------------------------------------
2104      Tell every processor the number of nonzeros per row
2105   */
2106   PetscCall(PetscMalloc1(A->rmap->N/bs,&lens));
2107   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2108     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2109   }
2110   PetscCall(PetscMalloc1(2*size,&recvcounts));
2111   displs    = recvcounts + size;
2112   for (i=0; i<size; i++) {
2113     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2114     displs[i]     = A->rmap->range[i]/bs;
2115   }
2116   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A)));
2117   /* ---------------------------------------------------------------
2118      Create the sequential matrix of the same type as the local block diagonal
2119   */
2120   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
2121   PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE));
2122   PetscCall(MatSetType(B,MATSEQAIJ));
2123   PetscCall(MatSeqAIJSetPreallocation(B,0,lens));
2124   b    = (Mat_SeqAIJ*)B->data;
2125 
2126   /*--------------------------------------------------------------------
2127     Copy my part of matrix column indices over
2128   */
2129   sendcount  = ad->nz + bd->nz;
2130   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2131   a_jsendbuf = ad->j;
2132   b_jsendbuf = bd->j;
2133   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2134   cnt        = 0;
2135   for (i=0; i<n; i++) {
2136 
2137     /* put in lower diagonal portion */
2138     m = bd->i[i+1] - bd->i[i];
2139     while (m > 0) {
2140       /* is it above diagonal (in bd (compressed) numbering) */
2141       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2142       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2143       m--;
2144     }
2145 
2146     /* put in diagonal portion */
2147     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2148       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2149     }
2150 
2151     /* put in upper diagonal portion */
2152     while (m-- > 0) {
2153       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2154     }
2155   }
2156   PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt);
2157 
2158   /*--------------------------------------------------------------------
2159     Gather all column indices to all processors
2160   */
2161   for (i=0; i<size; i++) {
2162     recvcounts[i] = 0;
2163     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2164       recvcounts[i] += lens[j];
2165     }
2166   }
2167   displs[0] = 0;
2168   for (i=1; i<size; i++) {
2169     displs[i] = displs[i-1] + recvcounts[i-1];
2170   }
2171   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A)));
2172   /*--------------------------------------------------------------------
2173     Assemble the matrix into useable form (note numerical values not yet set)
2174   */
2175   /* set the b->ilen (length of each row) values */
2176   PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs));
2177   /* set the b->i indices */
2178   b->i[0] = 0;
2179   for (i=1; i<=A->rmap->N/bs; i++) {
2180     b->i[i] = b->i[i-1] + lens[i-1];
2181   }
2182   PetscCall(PetscFree(lens));
2183   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2184   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2185   PetscCall(PetscFree(recvcounts));
2186 
2187   if (A->symmetric) PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE));
2188   else if (A->hermitian) PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE));
2189   else if (A->structurally_symmetric) PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE));
2190   *newmat = B;
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2195 {
2196   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2197   Vec            bb1 = NULL;
2198 
2199   PetscFunctionBegin;
2200   if (flag == SOR_APPLY_UPPER) {
2201     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2202     PetscFunctionReturn(0);
2203   }
2204 
2205   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2206     PetscCall(VecDuplicate(bb,&bb1));
2207   }
2208 
2209   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2210     if (flag & SOR_ZERO_INITIAL_GUESS) {
2211       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2212       its--;
2213     }
2214 
2215     while (its--) {
2216       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2217       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2218 
2219       /* update rhs: bb1 = bb - B*x */
2220       PetscCall(VecScale(mat->lvec,-1.0));
2221       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2222 
2223       /* local sweep */
2224       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
2225     }
2226   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2227     if (flag & SOR_ZERO_INITIAL_GUESS) {
2228       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2229       its--;
2230     }
2231     while (its--) {
2232       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2233       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2234 
2235       /* update rhs: bb1 = bb - B*x */
2236       PetscCall(VecScale(mat->lvec,-1.0));
2237       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2238 
2239       /* local sweep */
2240       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
2241     }
2242   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2243     if (flag & SOR_ZERO_INITIAL_GUESS) {
2244       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2245       its--;
2246     }
2247     while (its--) {
2248       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2249       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2250 
2251       /* update rhs: bb1 = bb - B*x */
2252       PetscCall(VecScale(mat->lvec,-1.0));
2253       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2254 
2255       /* local sweep */
2256       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
2257     }
2258   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2259 
2260   PetscCall(VecDestroy(&bb1));
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions)
2265 {
2266   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2267   PetscInt       m,N,i,*garray = aij->garray;
2268   PetscInt       ib,jb,bs = A->rmap->bs;
2269   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2270   MatScalar      *a_val = a_aij->a;
2271   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2272   MatScalar      *b_val = b_aij->a;
2273   PetscReal      *work;
2274 
2275   PetscFunctionBegin;
2276   PetscCall(MatGetSize(A,&m,&N));
2277   PetscCall(PetscCalloc1(N,&work));
2278   if (type == NORM_2) {
2279     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2280       for (jb=0; jb<bs; jb++) {
2281         for (ib=0; ib<bs; ib++) {
2282           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2283           a_val++;
2284         }
2285       }
2286     }
2287     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2288       for (jb=0; jb<bs; jb++) {
2289         for (ib=0; ib<bs; ib++) {
2290           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2291           b_val++;
2292         }
2293       }
2294     }
2295   } else if (type == NORM_1) {
2296     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2297       for (jb=0; jb<bs; jb++) {
2298         for (ib=0; ib<bs; ib++) {
2299           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2300           a_val++;
2301         }
2302       }
2303     }
2304     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2305       for (jb=0; jb<bs; jb++) {
2306        for (ib=0; ib<bs; ib++) {
2307           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2308           b_val++;
2309         }
2310       }
2311     }
2312   } else if (type == NORM_INFINITY) {
2313     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2314       for (jb=0; jb<bs; jb++) {
2315         for (ib=0; ib<bs; ib++) {
2316           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2317           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2318           a_val++;
2319         }
2320       }
2321     }
2322     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2323       for (jb=0; jb<bs; jb++) {
2324         for (ib=0; ib<bs; ib++) {
2325           int col = garray[b_aij->j[i]] * bs + jb;
2326           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2327           b_val++;
2328         }
2329       }
2330     }
2331   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2332     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2333       for (jb=0; jb<bs; jb++) {
2334         for (ib=0; ib<bs; ib++) {
2335           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2336           a_val++;
2337         }
2338       }
2339     }
2340     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2341       for (jb=0; jb<bs; jb++) {
2342        for (ib=0; ib<bs; ib++) {
2343           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2344           b_val++;
2345         }
2346       }
2347     }
2348   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2349     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2350       for (jb=0; jb<bs; jb++) {
2351         for (ib=0; ib<bs; ib++) {
2352           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2353           a_val++;
2354         }
2355       }
2356     }
2357     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2358       for (jb=0; jb<bs; jb++) {
2359        for (ib=0; ib<bs; ib++) {
2360           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2361           b_val++;
2362         }
2363       }
2364     }
2365   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
2366   if (type == NORM_INFINITY) {
2367     PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
2368   } else {
2369     PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
2370   }
2371   PetscCall(PetscFree(work));
2372   if (type == NORM_2) {
2373     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2374   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2375     for (i=0; i<N; i++) reductions[i] /= m;
2376   }
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2381 {
2382   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2383 
2384   PetscFunctionBegin;
2385   PetscCall(MatInvertBlockDiagonal(a->A,values));
2386   A->factorerrortype             = a->A->factorerrortype;
2387   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2388   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2393 {
2394   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2395   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2396 
2397   PetscFunctionBegin;
2398   if (!Y->preallocated) {
2399     PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
2400   } else if (!aij->nz) {
2401     PetscInt nonew = aij->nonew;
2402     PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
2403     aij->nonew = nonew;
2404   }
2405   PetscCall(MatShift_Basic(Y,a));
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2410 {
2411   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2412 
2413   PetscFunctionBegin;
2414   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2415   PetscCall(MatMissingDiagonal(a->A,missing,d));
2416   if (d) {
2417     PetscInt rstart;
2418     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2419     *d += rstart/A->rmap->bs;
2420 
2421   }
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2426 {
2427   PetscFunctionBegin;
2428   *a = ((Mat_MPIBAIJ*)A->data)->A;
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 /* -------------------------------------------------------------------*/
2433 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2434                                        MatGetRow_MPIBAIJ,
2435                                        MatRestoreRow_MPIBAIJ,
2436                                        MatMult_MPIBAIJ,
2437                                 /* 4*/ MatMultAdd_MPIBAIJ,
2438                                        MatMultTranspose_MPIBAIJ,
2439                                        MatMultTransposeAdd_MPIBAIJ,
2440                                        NULL,
2441                                        NULL,
2442                                        NULL,
2443                                 /*10*/ NULL,
2444                                        NULL,
2445                                        NULL,
2446                                        MatSOR_MPIBAIJ,
2447                                        MatTranspose_MPIBAIJ,
2448                                 /*15*/ MatGetInfo_MPIBAIJ,
2449                                        MatEqual_MPIBAIJ,
2450                                        MatGetDiagonal_MPIBAIJ,
2451                                        MatDiagonalScale_MPIBAIJ,
2452                                        MatNorm_MPIBAIJ,
2453                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2454                                        MatAssemblyEnd_MPIBAIJ,
2455                                        MatSetOption_MPIBAIJ,
2456                                        MatZeroEntries_MPIBAIJ,
2457                                 /*24*/ MatZeroRows_MPIBAIJ,
2458                                        NULL,
2459                                        NULL,
2460                                        NULL,
2461                                        NULL,
2462                                 /*29*/ MatSetUp_MPIBAIJ,
2463                                        NULL,
2464                                        NULL,
2465                                        MatGetDiagonalBlock_MPIBAIJ,
2466                                        NULL,
2467                                 /*34*/ MatDuplicate_MPIBAIJ,
2468                                        NULL,
2469                                        NULL,
2470                                        NULL,
2471                                        NULL,
2472                                 /*39*/ MatAXPY_MPIBAIJ,
2473                                        MatCreateSubMatrices_MPIBAIJ,
2474                                        MatIncreaseOverlap_MPIBAIJ,
2475                                        MatGetValues_MPIBAIJ,
2476                                        MatCopy_MPIBAIJ,
2477                                 /*44*/ NULL,
2478                                        MatScale_MPIBAIJ,
2479                                        MatShift_MPIBAIJ,
2480                                        NULL,
2481                                        MatZeroRowsColumns_MPIBAIJ,
2482                                 /*49*/ NULL,
2483                                        NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        NULL,
2487                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2488                                        NULL,
2489                                        MatSetUnfactored_MPIBAIJ,
2490                                        MatPermute_MPIBAIJ,
2491                                        MatSetValuesBlocked_MPIBAIJ,
2492                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2493                                        MatDestroy_MPIBAIJ,
2494                                        MatView_MPIBAIJ,
2495                                        NULL,
2496                                        NULL,
2497                                 /*64*/ NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                 /*74*/ NULL,
2508                                        MatFDColoringApply_BAIJ,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                 /*79*/ NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        MatLoad_MPIBAIJ,
2517                                 /*84*/ NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                 /*89*/ NULL,
2523                                        NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                 /*94*/ NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        NULL,
2532                                 /*99*/ NULL,
2533                                        NULL,
2534                                        NULL,
2535                                        MatConjugate_MPIBAIJ,
2536                                        NULL,
2537                                 /*104*/NULL,
2538                                        MatRealPart_MPIBAIJ,
2539                                        MatImaginaryPart_MPIBAIJ,
2540                                        NULL,
2541                                        NULL,
2542                                 /*109*/NULL,
2543                                        NULL,
2544                                        NULL,
2545                                        NULL,
2546                                        MatMissingDiagonal_MPIBAIJ,
2547                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2548                                        NULL,
2549                                        MatGetGhosts_MPIBAIJ,
2550                                        NULL,
2551                                        NULL,
2552                                 /*119*/NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        MatGetMultiProcBlock_MPIBAIJ,
2557                                 /*124*/NULL,
2558                                        MatGetColumnReductions_MPIBAIJ,
2559                                        MatInvertBlockDiagonal_MPIBAIJ,
2560                                        NULL,
2561                                        NULL,
2562                                /*129*/ NULL,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                /*134*/ NULL,
2568                                        NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                /*139*/ MatSetBlockSizes_Default,
2573                                        NULL,
2574                                        NULL,
2575                                        MatFDColoringSetUp_MPIXAIJ,
2576                                        NULL,
2577                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2578                                        NULL,
2579                                        NULL,
2580                                        NULL,
2581                                        NULL,
2582                                        NULL
2583 };
2584 
2585 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2586 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2587 
2588 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2589 {
2590   PetscInt       m,rstart,cstart,cend;
2591   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2592   const PetscInt *JJ    =NULL;
2593   PetscScalar    *values=NULL;
2594   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2595   PetscBool      nooffprocentries;
2596 
2597   PetscFunctionBegin;
2598   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
2599   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
2600   PetscCall(PetscLayoutSetUp(B->rmap));
2601   PetscCall(PetscLayoutSetUp(B->cmap));
2602   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2603   m      = B->rmap->n/bs;
2604   rstart = B->rmap->rstart/bs;
2605   cstart = B->cmap->rstart/bs;
2606   cend   = B->cmap->rend/bs;
2607 
2608   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2609   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2610   for (i=0; i<m; i++) {
2611     nz = ii[i+1] - ii[i];
2612     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2613     nz_max = PetscMax(nz_max,nz);
2614     dlen   = 0;
2615     olen   = 0;
2616     JJ     = jj + ii[i];
2617     for (j=0; j<nz; j++) {
2618       if (*JJ < cstart || *JJ >= cend) olen++;
2619       else dlen++;
2620       JJ++;
2621     }
2622     d_nnz[i] = dlen;
2623     o_nnz[i] = olen;
2624   }
2625   PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2626   PetscCall(PetscFree2(d_nnz,o_nnz));
2627 
2628   values = (PetscScalar*)V;
2629   if (!values) {
2630     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2631   }
2632   for (i=0; i<m; i++) {
2633     PetscInt          row    = i + rstart;
2634     PetscInt          ncols  = ii[i+1] - ii[i];
2635     const PetscInt    *icols = jj + ii[i];
2636     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2637       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2638       PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2639     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2640       PetscInt j;
2641       for (j=0; j<ncols; j++) {
2642         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2643         PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2644       }
2645     }
2646   }
2647 
2648   if (!V) PetscCall(PetscFree(values));
2649   nooffprocentries    = B->nooffprocentries;
2650   B->nooffprocentries = PETSC_TRUE;
2651   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2652   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2653   B->nooffprocentries = nooffprocentries;
2654 
2655   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 /*@C
2660    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2661 
2662    Collective
2663 
2664    Input Parameters:
2665 +  B - the matrix
2666 .  bs - the block size
2667 .  i - the indices into j for the start of each local row (starts with zero)
2668 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2669 -  v - optional values in the matrix
2670 
2671    Level: advanced
2672 
2673    Notes:
2674     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2675    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2676    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2677    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2678    block column and the second index is over columns within a block.
2679 
2680    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2681 
2682 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2683 @*/
2684 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2685 {
2686   PetscFunctionBegin;
2687   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2688   PetscValidType(B,1);
2689   PetscValidLogicalCollectiveInt(B,bs,2);
2690   PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2695 {
2696   Mat_MPIBAIJ    *b;
2697   PetscInt       i;
2698   PetscMPIInt    size;
2699 
2700   PetscFunctionBegin;
2701   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
2702   PetscCall(PetscLayoutSetUp(B->rmap));
2703   PetscCall(PetscLayoutSetUp(B->cmap));
2704   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2705 
2706   if (d_nnz) {
2707     for (i=0; i<B->rmap->n/bs; i++) {
2708       PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]);
2709     }
2710   }
2711   if (o_nnz) {
2712     for (i=0; i<B->rmap->n/bs; i++) {
2713       PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]);
2714     }
2715   }
2716 
2717   b      = (Mat_MPIBAIJ*)B->data;
2718   b->bs2 = bs*bs;
2719   b->mbs = B->rmap->n/bs;
2720   b->nbs = B->cmap->n/bs;
2721   b->Mbs = B->rmap->N/bs;
2722   b->Nbs = B->cmap->N/bs;
2723 
2724   for (i=0; i<=b->size; i++) {
2725     b->rangebs[i] = B->rmap->range[i]/bs;
2726   }
2727   b->rstartbs = B->rmap->rstart/bs;
2728   b->rendbs   = B->rmap->rend/bs;
2729   b->cstartbs = B->cmap->rstart/bs;
2730   b->cendbs   = B->cmap->rend/bs;
2731 
2732 #if defined(PETSC_USE_CTABLE)
2733   PetscCall(PetscTableDestroy(&b->colmap));
2734 #else
2735   PetscCall(PetscFree(b->colmap));
2736 #endif
2737   PetscCall(PetscFree(b->garray));
2738   PetscCall(VecDestroy(&b->lvec));
2739   PetscCall(VecScatterDestroy(&b->Mvctx));
2740 
2741   /* Because the B will have been resized we simply destroy it and create a new one each time */
2742   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2743   PetscCall(MatDestroy(&b->B));
2744   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2745   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2746   PetscCall(MatSetType(b->B,MATSEQBAIJ));
2747   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2748 
2749   if (!B->preallocated) {
2750     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2751     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2752     PetscCall(MatSetType(b->A,MATSEQBAIJ));
2753     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2754     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
2755   }
2756 
2757   PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
2758   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
2759   B->preallocated  = PETSC_TRUE;
2760   B->was_assembled = PETSC_FALSE;
2761   B->assembled     = PETSC_FALSE;
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2766 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2767 
2768 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2769 {
2770   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2771   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2772   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2773   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2774 
2775   PetscFunctionBegin;
2776   PetscCall(PetscMalloc1(M+1,&ii));
2777   ii[0] = 0;
2778   for (i=0; i<M; i++) {
2779     PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]);
2780     PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]);
2781     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2782     /* remove one from count of matrix has diagonal */
2783     for (j=id[i]; j<id[i+1]; j++) {
2784       if (jd[j] == i) {ii[i+1]--;break;}
2785     }
2786   }
2787   PetscCall(PetscMalloc1(ii[M],&jj));
2788   cnt  = 0;
2789   for (i=0; i<M; i++) {
2790     for (j=io[i]; j<io[i+1]; j++) {
2791       if (garray[jo[j]] > rstart) break;
2792       jj[cnt++] = garray[jo[j]];
2793     }
2794     for (k=id[i]; k<id[i+1]; k++) {
2795       if (jd[k] != i) {
2796         jj[cnt++] = rstart + jd[k];
2797       }
2798     }
2799     for (; j<io[i+1]; j++) {
2800       jj[cnt++] = garray[jo[j]];
2801     }
2802   }
2803   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj));
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2808 
2809 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2810 
2811 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2812 {
2813   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2814   Mat_MPIAIJ  *b;
2815   Mat         B;
2816 
2817   PetscFunctionBegin;
2818   PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2819 
2820   if (reuse == MAT_REUSE_MATRIX) {
2821     B = *newmat;
2822   } else {
2823     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2824     PetscCall(MatSetType(B,MATMPIAIJ));
2825     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2826     PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs));
2827     PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
2828     PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL));
2829   }
2830   b = (Mat_MPIAIJ*) B->data;
2831 
2832   if (reuse == MAT_REUSE_MATRIX) {
2833     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2834     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2835   } else {
2836     PetscCall(MatDestroy(&b->A));
2837     PetscCall(MatDestroy(&b->B));
2838     PetscCall(MatDisAssemble_MPIBAIJ(A));
2839     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2840     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2841     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
2842     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
2843   }
2844   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2845   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2846 
2847   if (reuse == MAT_INPLACE_MATRIX) {
2848     PetscCall(MatHeaderReplace(A,&B));
2849   } else {
2850    *newmat = B;
2851   }
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 /*MC
2856    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2857 
2858    Options Database Keys:
2859 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2860 . -mat_block_size <bs> - set the blocksize used to store the matrix
2861 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2862 - -mat_use_hash_table <fact> - set hash table factor
2863 
2864    Level: beginner
2865 
2866    Notes:
2867     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2868     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2869 
2870 .seealso: `MatCreateBAIJ`
2871 M*/
2872 
2873 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2874 
2875 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2876 {
2877   Mat_MPIBAIJ    *b;
2878   PetscBool      flg = PETSC_FALSE;
2879 
2880   PetscFunctionBegin;
2881   PetscCall(PetscNewLog(B,&b));
2882   B->data = (void*)b;
2883 
2884   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2885   B->assembled = PETSC_FALSE;
2886 
2887   B->insertmode = NOT_SET_VALUES;
2888   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2889   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2890 
2891   /* build local table of row and column ownerships */
2892   PetscCall(PetscMalloc1(b->size+1,&b->rangebs));
2893 
2894   /* build cache for off array entries formed */
2895   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2896 
2897   b->donotstash  = PETSC_FALSE;
2898   b->colmap      = NULL;
2899   b->garray      = NULL;
2900   b->roworiented = PETSC_TRUE;
2901 
2902   /* stuff used in block assembly */
2903   b->barray = NULL;
2904 
2905   /* stuff used for matrix vector multiply */
2906   b->lvec  = NULL;
2907   b->Mvctx = NULL;
2908 
2909   /* stuff for MatGetRow() */
2910   b->rowindices   = NULL;
2911   b->rowvalues    = NULL;
2912   b->getrowactive = PETSC_FALSE;
2913 
2914   /* hash table stuff */
2915   b->ht           = NULL;
2916   b->hd           = NULL;
2917   b->ht_size      = 0;
2918   b->ht_flag      = PETSC_FALSE;
2919   b->ht_fact      = 0;
2920   b->ht_total_ct  = 0;
2921   b->ht_insert_ct = 0;
2922 
2923   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2924   b->ijonly = PETSC_FALSE;
2925 
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ));
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ));
2929 #if defined(PETSC_HAVE_HYPRE)
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE));
2931 #endif
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS));
2939   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ));
2940 
2941   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2942   PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg));
2943   if (flg) {
2944     PetscReal fact = 1.39;
2945     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2946     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2947     if (fact <= 1.0) fact = 1.39;
2948     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2949     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2950   }
2951   PetscOptionsEnd();
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 /*MC
2956    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2957 
2958    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2959    and MATMPIBAIJ otherwise.
2960 
2961    Options Database Keys:
2962 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2963 
2964   Level: beginner
2965 
2966 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2967 M*/
2968 
2969 /*@C
2970    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2971    (block compressed row).  For good matrix assembly performance
2972    the user should preallocate the matrix storage by setting the parameters
2973    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2974    performance can be increased by more than a factor of 50.
2975 
2976    Collective on Mat
2977 
2978    Input Parameters:
2979 +  B - the matrix
2980 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2981           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2982 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2983            submatrix  (same for all local rows)
2984 .  d_nnz - array containing the number of block nonzeros in the various block rows
2985            of the in diagonal portion of the local (possibly different for each block
2986            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2987            set it even if it is zero.
2988 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2989            submatrix (same for all local rows).
2990 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2991            off-diagonal portion of the local submatrix (possibly different for
2992            each block row) or NULL.
2993 
2994    If the *_nnz parameter is given then the *_nz parameter is ignored
2995 
2996    Options Database Keys:
2997 +   -mat_block_size - size of the blocks to use
2998 -   -mat_use_hash_table <fact> - set hash table factor
2999 
3000    Notes:
3001    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3002    than it must be used on all processors that share the object for that argument.
3003 
3004    Storage Information:
3005    For a square global matrix we define each processor's diagonal portion
3006    to be its local rows and the corresponding columns (a square submatrix);
3007    each processor's off-diagonal portion encompasses the remainder of the
3008    local matrix (a rectangular submatrix).
3009 
3010    The user can specify preallocated storage for the diagonal part of
3011    the local submatrix with either d_nz or d_nnz (not both).  Set
3012    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3013    memory allocation.  Likewise, specify preallocated storage for the
3014    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3015 
3016    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3017    the figure below we depict these three local rows and all columns (0-11).
3018 
3019 .vb
3020            0 1 2 3 4 5 6 7 8 9 10 11
3021           --------------------------
3022    row 3  |o o o d d d o o o o  o  o
3023    row 4  |o o o d d d o o o o  o  o
3024    row 5  |o o o d d d o o o o  o  o
3025           --------------------------
3026 .ve
3027 
3028    Thus, any entries in the d locations are stored in the d (diagonal)
3029    submatrix, and any entries in the o locations are stored in the
3030    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3031    stored simply in the MATSEQBAIJ format for compressed row storage.
3032 
3033    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3034    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3035    In general, for PDE problems in which most nonzeros are near the diagonal,
3036    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3037    or you will get TERRIBLE performance; see the users' manual chapter on
3038    matrices.
3039 
3040    You can call MatGetInfo() to get information on how effective the preallocation was;
3041    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3042    You can also run with the option -info and look for messages with the string
3043    malloc in them to see if additional memory allocation was needed.
3044 
3045    Level: intermediate
3046 
3047 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3048 @*/
3049 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3050 {
3051   PetscFunctionBegin;
3052   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3053   PetscValidType(B,1);
3054   PetscValidLogicalCollectiveInt(B,bs,2);
3055   PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
3056   PetscFunctionReturn(0);
3057 }
3058 
3059 /*@C
3060    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3061    (block compressed row).  For good matrix assembly performance
3062    the user should preallocate the matrix storage by setting the parameters
3063    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3064    performance can be increased by more than a factor of 50.
3065 
3066    Collective
3067 
3068    Input Parameters:
3069 +  comm - MPI communicator
3070 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3071           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3072 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3073            This value should be the same as the local size used in creating the
3074            y vector for the matrix-vector product y = Ax.
3075 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3076            This value should be the same as the local size used in creating the
3077            x vector for the matrix-vector product y = Ax.
3078 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3079 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3080 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3081            submatrix  (same for all local rows)
3082 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3083            of the in diagonal portion of the local (possibly different for each block
3084            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3085            and set it even if it is zero.
3086 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3087            submatrix (same for all local rows).
3088 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3089            off-diagonal portion of the local submatrix (possibly different for
3090            each block row) or NULL.
3091 
3092    Output Parameter:
3093 .  A - the matrix
3094 
3095    Options Database Keys:
3096 +   -mat_block_size - size of the blocks to use
3097 -   -mat_use_hash_table <fact> - set hash table factor
3098 
3099    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3100    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3101    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3102 
3103    Notes:
3104    If the *_nnz parameter is given then the *_nz parameter is ignored
3105 
3106    A nonzero block is any block that as 1 or more nonzeros in it
3107 
3108    The user MUST specify either the local or global matrix dimensions
3109    (possibly both).
3110 
3111    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3112    than it must be used on all processors that share the object for that argument.
3113 
3114    Storage Information:
3115    For a square global matrix we define each processor's diagonal portion
3116    to be its local rows and the corresponding columns (a square submatrix);
3117    each processor's off-diagonal portion encompasses the remainder of the
3118    local matrix (a rectangular submatrix).
3119 
3120    The user can specify preallocated storage for the diagonal part of
3121    the local submatrix with either d_nz or d_nnz (not both).  Set
3122    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3123    memory allocation.  Likewise, specify preallocated storage for the
3124    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3125 
3126    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3127    the figure below we depict these three local rows and all columns (0-11).
3128 
3129 .vb
3130            0 1 2 3 4 5 6 7 8 9 10 11
3131           --------------------------
3132    row 3  |o o o d d d o o o o  o  o
3133    row 4  |o o o d d d o o o o  o  o
3134    row 5  |o o o d d d o o o o  o  o
3135           --------------------------
3136 .ve
3137 
3138    Thus, any entries in the d locations are stored in the d (diagonal)
3139    submatrix, and any entries in the o locations are stored in the
3140    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3141    stored simply in the MATSEQBAIJ format for compressed row storage.
3142 
3143    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3144    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3145    In general, for PDE problems in which most nonzeros are near the diagonal,
3146    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3147    or you will get TERRIBLE performance; see the users' manual chapter on
3148    matrices.
3149 
3150    Level: intermediate
3151 
3152 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3153 @*/
3154 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3155 {
3156   PetscMPIInt    size;
3157 
3158   PetscFunctionBegin;
3159   PetscCall(MatCreate(comm,A));
3160   PetscCall(MatSetSizes(*A,m,n,M,N));
3161   PetscCallMPI(MPI_Comm_size(comm,&size));
3162   if (size > 1) {
3163     PetscCall(MatSetType(*A,MATMPIBAIJ));
3164     PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
3165   } else {
3166     PetscCall(MatSetType(*A,MATSEQBAIJ));
3167     PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
3168   }
3169   PetscFunctionReturn(0);
3170 }
3171 
3172 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3173 {
3174   Mat            mat;
3175   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3176   PetscInt       len=0;
3177 
3178   PetscFunctionBegin;
3179   *newmat = NULL;
3180   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
3181   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
3182   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
3183 
3184   mat->factortype   = matin->factortype;
3185   mat->preallocated = PETSC_TRUE;
3186   mat->assembled    = PETSC_TRUE;
3187   mat->insertmode   = NOT_SET_VALUES;
3188 
3189   a             = (Mat_MPIBAIJ*)mat->data;
3190   mat->rmap->bs = matin->rmap->bs;
3191   a->bs2        = oldmat->bs2;
3192   a->mbs        = oldmat->mbs;
3193   a->nbs        = oldmat->nbs;
3194   a->Mbs        = oldmat->Mbs;
3195   a->Nbs        = oldmat->Nbs;
3196 
3197   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
3198   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
3199 
3200   a->size         = oldmat->size;
3201   a->rank         = oldmat->rank;
3202   a->donotstash   = oldmat->donotstash;
3203   a->roworiented  = oldmat->roworiented;
3204   a->rowindices   = NULL;
3205   a->rowvalues    = NULL;
3206   a->getrowactive = PETSC_FALSE;
3207   a->barray       = NULL;
3208   a->rstartbs     = oldmat->rstartbs;
3209   a->rendbs       = oldmat->rendbs;
3210   a->cstartbs     = oldmat->cstartbs;
3211   a->cendbs       = oldmat->cendbs;
3212 
3213   /* hash table stuff */
3214   a->ht           = NULL;
3215   a->hd           = NULL;
3216   a->ht_size      = 0;
3217   a->ht_flag      = oldmat->ht_flag;
3218   a->ht_fact      = oldmat->ht_fact;
3219   a->ht_total_ct  = 0;
3220   a->ht_insert_ct = 0;
3221 
3222   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1));
3223   if (oldmat->colmap) {
3224 #if defined(PETSC_USE_CTABLE)
3225     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
3226 #else
3227     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
3228     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
3229     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
3230 #endif
3231   } else a->colmap = NULL;
3232 
3233   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3234     PetscCall(PetscMalloc1(len,&a->garray));
3235     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
3236     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
3237   } else a->garray = NULL;
3238 
3239   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
3240   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
3241   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
3242   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
3243   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
3244 
3245   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
3246   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
3247   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
3248   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
3249   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
3250   *newmat = mat;
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3255 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3256 {
3257   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3258   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3259   PetscScalar    *matvals;
3260 
3261   PetscFunctionBegin;
3262   PetscCall(PetscViewerSetUp(viewer));
3263 
3264   /* read in matrix header */
3265   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3266   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3267   M  = header[1]; N = header[2]; nz = header[3];
3268   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3269   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3270   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3271 
3272   /* set block sizes from the viewer's .info file */
3273   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3274   /* set local sizes if not set already */
3275   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3276   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3277   /* set global sizes if not set already */
3278   if (mat->rmap->N < 0) mat->rmap->N = M;
3279   if (mat->cmap->N < 0) mat->cmap->N = N;
3280   PetscCall(PetscLayoutSetUp(mat->rmap));
3281   PetscCall(PetscLayoutSetUp(mat->cmap));
3282 
3283   /* check if the matrix sizes are correct */
3284   PetscCall(MatGetSize(mat,&rows,&cols));
3285   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3286   PetscCall(MatGetBlockSize(mat,&bs));
3287   PetscCall(MatGetLocalSize(mat,&m,&n));
3288   PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL));
3289   PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce));
3290   mbs = m/bs; nbs = n/bs;
3291 
3292   /* read in row lengths and build row indices */
3293   PetscCall(PetscMalloc1(m+1,&rowidxs));
3294   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3295   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3296   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3297   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3298 
3299   /* read in column indices and matrix values */
3300   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3301   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3302   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3303 
3304   { /* preallocate matrix storage */
3305     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3306     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3307     PetscBool  sbaij,done;
3308     PetscInt   *d_nnz,*o_nnz;
3309 
3310     PetscCall(PetscBTCreate(nbs,&bt));
3311     PetscCall(PetscHSetICreate(&ht));
3312     PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz));
3313     PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij));
3314     for (i=0; i<mbs; i++) {
3315       PetscCall(PetscBTMemzero(nbs,bt));
3316       PetscCall(PetscHSetIClear(ht));
3317       for (k=0; k<bs; k++) {
3318         PetscInt row = bs*i + k;
3319         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3320           PetscInt col = colidxs[j];
3321           if (!sbaij || col >= row) {
3322             if (col >= cs && col < ce) {
3323               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3324             } else {
3325               PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done));
3326               if (done) o_nnz[i]++;
3327             }
3328           }
3329         }
3330       }
3331     }
3332     PetscCall(PetscBTDestroy(&bt));
3333     PetscCall(PetscHSetIDestroy(&ht));
3334     PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz));
3335     PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz));
3336     PetscCall(PetscFree2(d_nnz,o_nnz));
3337   }
3338 
3339   /* store matrix values */
3340   for (i=0; i<m; i++) {
3341     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3342     PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES));
3343   }
3344 
3345   PetscCall(PetscFree(rowidxs));
3346   PetscCall(PetscFree2(colidxs,matvals));
3347   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
3348   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3353 {
3354   PetscBool isbinary;
3355 
3356   PetscFunctionBegin;
3357   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3358   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3359   PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer));
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 /*@
3364    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3365 
3366    Input Parameters:
3367 +  mat  - the matrix
3368 -  fact - factor
3369 
3370    Not Collective, each process can use a different factor
3371 
3372    Level: advanced
3373 
3374   Notes:
3375    This can also be set by the command line option: -mat_use_hash_table <fact>
3376 
3377 .seealso: `MatSetOption()`
3378 @*/
3379 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3380 {
3381   PetscFunctionBegin;
3382   PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));
3383   PetscFunctionReturn(0);
3384 }
3385 
3386 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3387 {
3388   Mat_MPIBAIJ *baij;
3389 
3390   PetscFunctionBegin;
3391   baij          = (Mat_MPIBAIJ*)mat->data;
3392   baij->ht_fact = fact;
3393   PetscFunctionReturn(0);
3394 }
3395 
3396 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3397 {
3398   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3399   PetscBool    flg;
3400 
3401   PetscFunctionBegin;
3402   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg));
3403   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3404   if (Ad)     *Ad     = a->A;
3405   if (Ao)     *Ao     = a->B;
3406   if (colmap) *colmap = a->garray;
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 /*
3411     Special version for direct calls from Fortran (to eliminate two function call overheads
3412 */
3413 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3414 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3415 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3416 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3417 #endif
3418 
3419 /*@C
3420   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3421 
3422   Collective on Mat
3423 
3424   Input Parameters:
3425 + mat - the matrix
3426 . min - number of input rows
3427 . im - input rows
3428 . nin - number of input columns
3429 . in - input columns
3430 . v - numerical values input
3431 - addvin - INSERT_VALUES or ADD_VALUES
3432 
3433   Notes:
3434     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3435 
3436   Level: advanced
3437 
3438 .seealso: `MatSetValuesBlocked()`
3439 @*/
3440 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3441 {
3442   /* convert input arguments to C version */
3443   Mat        mat  = *matin;
3444   PetscInt   m    = *min, n = *nin;
3445   InsertMode addv = *addvin;
3446 
3447   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3448   const MatScalar *value;
3449   MatScalar       *barray     = baij->barray;
3450   PetscBool       roworiented = baij->roworiented;
3451   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3452   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3453   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3454 
3455   PetscFunctionBegin;
3456   /* tasks normally handled by MatSetValuesBlocked() */
3457   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3458   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3459   PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3460   if (mat->assembled) {
3461     mat->was_assembled = PETSC_TRUE;
3462     mat->assembled     = PETSC_FALSE;
3463   }
3464   PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0));
3465 
3466   if (!barray) {
3467     PetscCall(PetscMalloc1(bs2,&barray));
3468     baij->barray = barray;
3469   }
3470 
3471   if (roworiented) stepval = (n-1)*bs;
3472   else stepval = (m-1)*bs;
3473 
3474   for (i=0; i<m; i++) {
3475     if (im[i] < 0) continue;
3476     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
3477     if (im[i] >= rstart && im[i] < rend) {
3478       row = im[i] - rstart;
3479       for (j=0; j<n; j++) {
3480         /* If NumCol = 1 then a copy is not required */
3481         if ((roworiented) && (n == 1)) {
3482           barray = (MatScalar*)v + i*bs2;
3483         } else if ((!roworiented) && (m == 1)) {
3484           barray = (MatScalar*)v + j*bs2;
3485         } else { /* Here a copy is required */
3486           if (roworiented) {
3487             value = v + i*(stepval+bs)*bs + j*bs;
3488           } else {
3489             value = v + j*(stepval+bs)*bs + i*bs;
3490           }
3491           for (ii=0; ii<bs; ii++,value+=stepval) {
3492             for (jj=0; jj<bs; jj++) {
3493               *barray++ = *value++;
3494             }
3495           }
3496           barray -=bs2;
3497         }
3498 
3499         if (in[j] >= cstart && in[j] < cend) {
3500           col  = in[j] - cstart;
3501           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
3502         } else if (in[j] < 0) {
3503           continue;
3504         } else {
3505           PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
3506           if (mat->was_assembled) {
3507             if (!baij->colmap) {
3508               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3509             }
3510 
3511 #if defined(PETSC_USE_DEBUG)
3512 #if defined(PETSC_USE_CTABLE)
3513             { PetscInt data;
3514               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
3515               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3516             }
3517 #else
3518             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3519 #endif
3520 #endif
3521 #if defined(PETSC_USE_CTABLE)
3522             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
3523             col  = (col - 1)/bs;
3524 #else
3525             col = (baij->colmap[in[j]] - 1)/bs;
3526 #endif
3527             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3528               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3529               col  =  in[j];
3530             }
3531           } else col = in[j];
3532           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
3533         }
3534       }
3535     } else {
3536       if (!baij->donotstash) {
3537         if (roworiented) {
3538           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
3539         } else {
3540           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
3541         }
3542       }
3543     }
3544   }
3545 
3546   /* task normally handled by MatSetValuesBlocked() */
3547   PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0));
3548   PetscFunctionReturn(0);
3549 }
3550 
3551 /*@
3552      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3553          CSR format the local rows.
3554 
3555    Collective
3556 
3557    Input Parameters:
3558 +  comm - MPI communicator
3559 .  bs - the block size, only a block size of 1 is supported
3560 .  m - number of local rows (Cannot be PETSC_DECIDE)
3561 .  n - This value should be the same as the local size used in creating the
3562        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3563        calculated if N is given) For square matrices n is almost always m.
3564 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3565 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3566 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3567 .   j - column indices
3568 -   a - matrix values
3569 
3570    Output Parameter:
3571 .   mat - the matrix
3572 
3573    Level: intermediate
3574 
3575    Notes:
3576        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3577      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3578      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3579 
3580      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3581      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3582      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3583      with column-major ordering within blocks.
3584 
3585        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3586 
3587 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3588           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3589 @*/
3590 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3591 {
3592   PetscFunctionBegin;
3593   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3594   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3595   PetscCall(MatCreate(comm,mat));
3596   PetscCall(MatSetSizes(*mat,m,n,M,N));
3597   PetscCall(MatSetType(*mat,MATMPIBAIJ));
3598   PetscCall(MatSetBlockSize(*mat,bs));
3599   PetscCall(MatSetUp(*mat));
3600   PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE));
3601   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a));
3602   PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE));
3603   PetscFunctionReturn(0);
3604 }
3605 
3606 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3607 {
3608   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3609   PetscInt       *indx;
3610   PetscScalar    *values;
3611 
3612   PetscFunctionBegin;
3613   PetscCall(MatGetSize(inmat,&m,&N));
3614   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3615     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3616     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3617     PetscInt       *bindx,rmax=a->rmax,j;
3618     PetscMPIInt    rank,size;
3619 
3620     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
3621     mbs = m/bs; Nbs = N/cbs;
3622     if (n == PETSC_DECIDE) {
3623       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
3624     }
3625     nbs = n/cbs;
3626 
3627     PetscCall(PetscMalloc1(rmax,&bindx));
3628     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
3629 
3630     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3631     PetscCallMPI(MPI_Comm_rank(comm,&size));
3632     if (rank == size-1) {
3633       /* Check sum(nbs) = Nbs */
3634       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
3635     }
3636 
3637     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3638     for (i=0; i<mbs; i++) {
3639       PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
3640       nnz = nnz/bs;
3641       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3642       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
3643       PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL));
3644     }
3645     PetscCall(PetscFree(bindx));
3646 
3647     PetscCall(MatCreate(comm,outmat));
3648     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
3649     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
3650     PetscCall(MatSetType(*outmat,MATBAIJ));
3651     PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz));
3652     PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
3653     MatPreallocateEnd(dnz,onz);
3654     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3655   }
3656 
3657   /* numeric phase */
3658   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
3659   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
3660 
3661   for (i=0; i<m; i++) {
3662     PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values));
3663     Ii   = i + rstart;
3664     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
3665     PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values));
3666   }
3667   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
3668   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
3669   PetscFunctionReturn(0);
3670 }
3671