xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 8fb5bd83c3955fefcf33a54e3bb66920a9fa884b)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
15   PetscScalar       *va,*vv;
16   Vec               vB,vA;
17   const PetscScalar *vb;
18 
19   PetscFunctionBegin;
20   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
21   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
22 
23   PetscCall(VecGetArrayWrite(vA,&va));
24   if (idx) {
25     for (i=0; i<m; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
31   PetscCall(PetscMalloc1(m,&idxb));
32   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
33 
34   PetscCall(VecGetArrayWrite(v,&vv));
35   PetscCall(VecGetArrayRead(vB,&vb));
36   for (i=0; i<m; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
38       vv[i] = vb[i];
39       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
40     } else {
41       vv[i] = va[i];
42       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
43         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
44     }
45   }
46   PetscCall(VecRestoreArrayWrite(vA,&vv));
47   PetscCall(VecRestoreArrayWrite(vA,&va));
48   PetscCall(VecRestoreArrayRead(vB,&vb));
49   PetscCall(PetscFree(idxb));
50   PetscCall(VecDestroy(&vA));
51   PetscCall(VecDestroy(&vB));
52   PetscFunctionReturn(0);
53 }
54 
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58 
59   PetscFunctionBegin;
60   PetscCall(MatStoreValues(aij->A));
61   PetscCall(MatStoreValues(aij->B));
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
68 
69   PetscFunctionBegin;
70   PetscCall(MatRetrieveValues(aij->A));
71   PetscCall(MatRetrieveValues(aij->B));
72   PetscFunctionReturn(0);
73 }
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scalable way since the
79    length of colmap equals the global matrix length.
80 */
81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
82 {
83   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
84   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
85   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
86 
87   PetscFunctionBegin;
88 #if defined(PETSC_USE_CTABLE)
89   PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap));
90   for (i=0; i<nbs; i++) {
91     PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES));
92   }
93 #else
94   PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap));
95   PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt)));
96   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
102   { \
103     brow = row/bs;  \
104     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
105     rmax = aimax[brow]; nrow = ailen[brow]; \
106     bcol = col/bs; \
107     ridx = row % bs; cidx = col % bs; \
108     low  = 0; high = nrow; \
109     while (high-low > 3) { \
110       t = (low+high)/2; \
111       if (rp[t] > bcol) high = t; \
112       else              low  = t; \
113     } \
114     for (_i=low; _i<high; _i++) { \
115       if (rp[_i] > bcol) break; \
116       if (rp[_i] == bcol) { \
117         bap = ap +  bs2*_i + bs*cidx + ridx; \
118         if (addv == ADD_VALUES) *bap += value;  \
119         else                    *bap  = value;  \
120         goto a_noinsert; \
121       } \
122     } \
123     if (a->nonew == 1) goto a_noinsert; \
124     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
125     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
126     N = nrow++ - 1;  \
127     /* shift up all the later entries in this row */ \
128     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\
129     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
130     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
131     rp[_i]                      = bcol;  \
132     ap[bs2*_i + bs*cidx + ridx] = value;  \
133 a_noinsert:; \
134     ailen[brow] = nrow; \
135   }
136 
137 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
138   { \
139     brow = row/bs;  \
140     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
141     rmax = bimax[brow]; nrow = bilen[brow]; \
142     bcol = col/bs; \
143     ridx = row % bs; cidx = col % bs; \
144     low  = 0; high = nrow; \
145     while (high-low > 3) { \
146       t = (low+high)/2; \
147       if (rp[t] > bcol) high = t; \
148       else              low  = t; \
149     } \
150     for (_i=low; _i<high; _i++) { \
151       if (rp[_i] > bcol) break; \
152       if (rp[_i] == bcol) { \
153         bap = ap +  bs2*_i + bs*cidx + ridx; \
154         if (addv == ADD_VALUES) *bap += value;  \
155         else                    *bap  = value;  \
156         goto b_noinsert; \
157       } \
158     } \
159     if (b->nonew == 1) goto b_noinsert; \
160     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
161     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
162     N = nrow++ - 1;  \
163     /* shift up all the later entries in this row */ \
164     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\
165     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\
166     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
167     rp[_i]                      = bcol;  \
168     ap[bs2*_i + bs*cidx + ridx] = value;  \
169 b_noinsert:; \
170     bilen[brow] = nrow; \
171   }
172 
173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
174 {
175   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
176   MatScalar      value;
177   PetscBool      roworiented = baij->roworiented;
178   PetscInt       i,j,row,col;
179   PetscInt       rstart_orig=mat->rmap->rstart;
180   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
181   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
182 
183   /* Some Variables required in the macro */
184   Mat         A     = baij->A;
185   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
186   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
187   MatScalar   *aa   =a->a;
188 
189   Mat         B     = baij->B;
190   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
191   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
192   MatScalar   *ba   =b->a;
193 
194   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
195   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
196   MatScalar *ap,*bap;
197 
198   PetscFunctionBegin;
199   for (i=0; i<m; i++) {
200     if (im[i] < 0) continue;
201     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
202     if (im[i] >= rstart_orig && im[i] < rend_orig) {
203       row = im[i] - rstart_orig;
204       for (j=0; j<n; j++) {
205         if (in[j] >= cstart_orig && in[j] < cend_orig) {
206           col = in[j] - cstart_orig;
207           if (roworiented) value = v[i*n+j];
208           else             value = v[i+j*m];
209           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
210         } else if (in[j] < 0) continue;
211         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
216             }
217 #if defined(PETSC_USE_CTABLE)
218             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               PetscCall(MatDisAssemble_MPIBAIJ(mat));
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
238         }
239       }
240     } else {
241       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE));
246         } else {
247           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE));
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
261   PetscBool         roworiented=a->roworiented;
262   const PetscScalar *value     = v;
263   MatScalar         *ap,*aa = a->a,*bap;
264 
265   PetscFunctionBegin;
266   rp   = aj + ai[row];
267   ap   = aa + bs2*ai[row];
268   rmax = imax[row];
269   nrow = ailen[row];
270   value = v;
271   low = 0;
272   high = nrow;
273   while (high-low > 7) {
274     t = (low+high)/2;
275     if (rp[t] > col) high = t;
276     else             low  = t;
277   }
278   for (i=low; i<high; i++) {
279     if (rp[i] > col) break;
280     if (rp[i] == col) {
281       bap = ap +  bs2*i;
282       if (roworiented) {
283         if (is == ADD_VALUES) {
284           for (ii=0; ii<bs; ii++) {
285             for (jj=ii; jj<bs2; jj+=bs) {
286               bap[jj] += *value++;
287             }
288           }
289         } else {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] = *value++;
293             }
294           }
295         }
296       } else {
297         if (is == ADD_VALUES) {
298           for (ii=0; ii<bs; ii++,value+=bs) {
299             for (jj=0; jj<bs; jj++) {
300               bap[jj] += value[jj];
301             }
302             bap += bs;
303           }
304         } else {
305           for (ii=0; ii<bs; ii++,value+=bs) {
306             for (jj=0; jj<bs; jj++) {
307               bap[jj]  = value[jj];
308             }
309             bap += bs;
310           }
311         }
312       }
313       goto noinsert2;
314     }
315   }
316   if (nonew == 1) goto noinsert2;
317   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
318   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
319   N = nrow++ - 1; high++;
320   /* shift up all the later entries in this row */
321   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
322   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
323   rp[i] = col;
324   bap   = ap +  bs2*i;
325   if (roworiented) {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=ii; jj<bs2; jj+=bs) {
328         bap[jj] = *value++;
329       }
330     }
331   } else {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=0; jj<bs; jj++) {
334         *bap++ = *value++;
335       }
336     }
337   }
338   noinsert2:;
339   ailen[row] = nrow;
340   PetscFunctionReturn(0);
341 }
342 
343 /*
344     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
345     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
346 */
347 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
348 {
349   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
350   const PetscScalar *value;
351   MatScalar         *barray     = baij->barray;
352   PetscBool         roworiented = baij->roworiented;
353   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
354   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
355   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
356 
357   PetscFunctionBegin;
358   if (!barray) {
359     PetscCall(PetscMalloc1(bs2,&barray));
360     baij->barray = barray;
361   }
362 
363   if (roworiented) stepval = (n-1)*bs;
364   else stepval = (m-1)*bs;
365 
366   for (i=0; i<m; i++) {
367     if (im[i] < 0) continue;
368     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
369     if (im[i] >= rstart && im[i] < rend) {
370       row = im[i] - rstart;
371       for (j=0; j<n; j++) {
372         /* If NumCol = 1 then a copy is not required */
373         if ((roworiented) && (n == 1)) {
374           barray = (MatScalar*)v + i*bs2;
375         } else if ((!roworiented) && (m == 1)) {
376           barray = (MatScalar*)v + j*bs2;
377         } else { /* Here a copy is required */
378           if (roworiented) {
379             value = v + (i*(stepval+bs) + j)*bs;
380           } else {
381             value = v + (j*(stepval+bs) + i)*bs;
382           }
383           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
384             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
385             barray += bs;
386           }
387           barray -= bs2;
388         }
389 
390         if (in[j] >= cstart && in[j] < cend) {
391           col  = in[j] - cstart;
392           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
393         } else if (in[j] < 0) continue;
394         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
395         else {
396           if (mat->was_assembled) {
397             if (!baij->colmap) {
398               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
399             }
400 
401 #if defined(PETSC_USE_DEBUG)
402 #if defined(PETSC_USE_CTABLE)
403             { PetscInt data;
404               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
405               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
406             }
407 #else
408             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
409 #endif
410 #endif
411 #if defined(PETSC_USE_CTABLE)
412             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
413             col  = (col - 1)/bs;
414 #else
415             col = (baij->colmap[in[j]] - 1)/bs;
416 #endif
417             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
418               PetscCall(MatDisAssemble_MPIBAIJ(mat));
419               col  =  in[j];
420             } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]);
421           } else col = in[j];
422           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
423         }
424       }
425     } else {
426       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
427       if (!baij->donotstash) {
428         if (roworiented) {
429           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
430         } else {
431           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
432         }
433       }
434     }
435   }
436   PetscFunctionReturn(0);
437 }
438 
439 #define HASH_KEY 0.6180339887
440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
441 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
442 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
443 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
444 {
445   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
446   PetscBool      roworiented = baij->roworiented;
447   PetscInt       i,j,row,col;
448   PetscInt       rstart_orig=mat->rmap->rstart;
449   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
450   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
451   PetscReal      tmp;
452   MatScalar      **HD = baij->hd,value;
453   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
454 
455   PetscFunctionBegin;
456   for (i=0; i<m; i++) {
457     if (PetscDefined(USE_DEBUG)) {
458       PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
459       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
460     }
461     row = im[i];
462     if (row >= rstart_orig && row < rend_orig) {
463       for (j=0; j<n; j++) {
464         col = in[j];
465         if (roworiented) value = v[i*n+j];
466         else             value = v[i+j*m];
467         /* Look up PetscInto the Hash Table */
468         key = (row/bs)*Nbs+(col/bs)+1;
469         h1  = HASH(size,key,tmp);
470 
471         idx = h1;
472         if (PetscDefined(USE_DEBUG)) {
473           insert_ct++;
474           total_ct++;
475           if (HT[idx] != key) {
476             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
477             if (idx == size) {
478               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
479               PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
480             }
481           }
482         } else if (HT[idx] != key) {
483           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
484           if (idx == size) {
485             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
486             PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
487           }
488         }
489         /* A HASH table entry is found, so insert the values at the correct address */
490         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
491         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
492       }
493     } else if (!baij->donotstash) {
494       if (roworiented) {
495         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE));
496       } else {
497         PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE));
498       }
499     }
500   }
501   if (PetscDefined(USE_DEBUG)) {
502     baij->ht_total_ct  += total_ct;
503     baij->ht_insert_ct += insert_ct;
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
509 {
510   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
511   PetscBool         roworiented = baij->roworiented;
512   PetscInt          i,j,ii,jj,row,col;
513   PetscInt          rstart=baij->rstartbs;
514   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
515   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
516   PetscReal         tmp;
517   MatScalar         **HD = baij->hd,*baij_a;
518   const PetscScalar *v_t,*value;
519   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
520 
521   PetscFunctionBegin;
522   if (roworiented) stepval = (n-1)*bs;
523   else stepval = (m-1)*bs;
524 
525   for (i=0; i<m; i++) {
526     if (PetscDefined(USE_DEBUG)) {
527       PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]);
528       PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
529     }
530     row = im[i];
531     v_t = v + i*nbs2;
532     if (row >= rstart && row < rend) {
533       for (j=0; j<n; j++) {
534         col = in[j];
535 
536         /* Look up into the Hash Table */
537         key = row*Nbs+col+1;
538         h1  = HASH(size,key,tmp);
539 
540         idx = h1;
541         if (PetscDefined(USE_DEBUG)) {
542           total_ct++;
543           insert_ct++;
544           if (HT[idx] != key) {
545             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
546             if (idx == size) {
547               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
548               PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
549             }
550           }
551         } else if (HT[idx] != key) {
552           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
553           if (idx == size) {
554             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
555             PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
556           }
557         }
558         baij_a = HD[idx];
559         if (roworiented) {
560           /*value = v + i*(stepval+bs)*bs + j*bs;*/
561           /* value = v + (i*(stepval+bs)+j)*bs; */
562           value = v_t;
563           v_t  += bs;
564           if (addv == ADD_VALUES) {
565             for (ii=0; ii<bs; ii++,value+=stepval) {
566               for (jj=ii; jj<bs2; jj+=bs) {
567                 baij_a[jj] += *value++;
568               }
569             }
570           } else {
571             for (ii=0; ii<bs; ii++,value+=stepval) {
572               for (jj=ii; jj<bs2; jj+=bs) {
573                 baij_a[jj] = *value++;
574               }
575             }
576           }
577         } else {
578           value = v + j*(stepval+bs)*bs + i*bs;
579           if (addv == ADD_VALUES) {
580             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
581               for (jj=0; jj<bs; jj++) {
582                 baij_a[jj] += *value++;
583               }
584             }
585           } else {
586             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
587               for (jj=0; jj<bs; jj++) {
588                 baij_a[jj] = *value++;
589               }
590             }
591           }
592         }
593       }
594     } else {
595       if (!baij->donotstash) {
596         if (roworiented) {
597           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
598         } else {
599           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
600         }
601       }
602     }
603   }
604   if (PetscDefined(USE_DEBUG)) {
605     baij->ht_total_ct  += total_ct;
606     baij->ht_insert_ct += insert_ct;
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
612 {
613   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
614   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
615   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
616 
617   PetscFunctionBegin;
618   for (i=0; i<m; i++) {
619     if (idxm[i] < 0) continue; /* negative row */
620     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
621     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
622       row = idxm[i] - bsrstart;
623       for (j=0; j<n; j++) {
624         if (idxn[j] < 0) continue; /* negative column */
625         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
626         if (idxn[j] >= bscstart && idxn[j] < bscend) {
627           col  = idxn[j] - bscstart;
628           PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
629         } else {
630           if (!baij->colmap) {
631             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
632           }
633 #if defined(PETSC_USE_CTABLE)
634           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
635           data--;
636 #else
637           data = baij->colmap[idxn[j]/bs]-1;
638 #endif
639           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
640           else {
641             col  = data + idxn[j]%bs;
642             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
643           }
644         }
645       }
646     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
647   }
648   PetscFunctionReturn(0);
649 }
650 
651 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
652 {
653   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
654   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
655   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
656   PetscReal      sum = 0.0;
657   MatScalar      *v;
658 
659   PetscFunctionBegin;
660   if (baij->size == 1) {
661     PetscCall(MatNorm(baij->A,type,nrm));
662   } else {
663     if (type == NORM_FROBENIUS) {
664       v  = amat->a;
665       nz = amat->nz*bs2;
666       for (i=0; i<nz; i++) {
667         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
668       }
669       v  = bmat->a;
670       nz = bmat->nz*bs2;
671       for (i=0; i<nz; i++) {
672         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
673       }
674       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
675       *nrm = PetscSqrtReal(*nrm);
676     } else if (type == NORM_1) { /* max column sum */
677       PetscReal *tmp,*tmp2;
678       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
679       PetscCall(PetscCalloc1(mat->cmap->N,&tmp));
680       PetscCall(PetscMalloc1(mat->cmap->N,&tmp2));
681       v    = amat->a; jj = amat->j;
682       for (i=0; i<amat->nz; i++) {
683         for (j=0; j<bs; j++) {
684           col = bs*(cstart + *jj) + j; /* column index */
685           for (row=0; row<bs; row++) {
686             tmp[col] += PetscAbsScalar(*v);  v++;
687           }
688         }
689         jj++;
690       }
691       v = bmat->a; jj = bmat->j;
692       for (i=0; i<bmat->nz; i++) {
693         for (j=0; j<bs; j++) {
694           col = bs*garray[*jj] + j;
695           for (row=0; row<bs; row++) {
696             tmp[col] += PetscAbsScalar(*v); v++;
697           }
698         }
699         jj++;
700       }
701       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
702       *nrm = 0.0;
703       for (j=0; j<mat->cmap->N; j++) {
704         if (tmp2[j] > *nrm) *nrm = tmp2[j];
705       }
706       PetscCall(PetscFree(tmp));
707       PetscCall(PetscFree(tmp2));
708     } else if (type == NORM_INFINITY) { /* max row sum */
709       PetscReal *sums;
710       PetscCall(PetscMalloc1(bs,&sums));
711       sum  = 0.0;
712       for (j=0; j<amat->mbs; j++) {
713         for (row=0; row<bs; row++) sums[row] = 0.0;
714         v  = amat->a + bs2*amat->i[j];
715         nz = amat->i[j+1]-amat->i[j];
716         for (i=0; i<nz; i++) {
717           for (col=0; col<bs; col++) {
718             for (row=0; row<bs; row++) {
719               sums[row] += PetscAbsScalar(*v); v++;
720             }
721           }
722         }
723         v  = bmat->a + bs2*bmat->i[j];
724         nz = bmat->i[j+1]-bmat->i[j];
725         for (i=0; i<nz; i++) {
726           for (col=0; col<bs; col++) {
727             for (row=0; row<bs; row++) {
728               sums[row] += PetscAbsScalar(*v); v++;
729             }
730           }
731         }
732         for (row=0; row<bs; row++) {
733           if (sums[row] > sum) sum = sums[row];
734         }
735       }
736       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
737       PetscCall(PetscFree(sums));
738     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 /*
744   Creates the hash table, and sets the table
745   This table is created only once.
746   If new entried need to be added to the matrix
747   then the hash table has to be destroyed and
748   recreated.
749 */
750 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
751 {
752   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
753   Mat            A     = baij->A,B=baij->B;
754   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
755   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
756   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
757   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
758   PetscInt       *HT,key;
759   MatScalar      **HD;
760   PetscReal      tmp;
761 #if defined(PETSC_USE_INFO)
762   PetscInt ct=0,max=0;
763 #endif
764 
765   PetscFunctionBegin;
766   if (baij->ht) PetscFunctionReturn(0);
767 
768   baij->ht_size = (PetscInt)(factor*nz);
769   ht_size       = baij->ht_size;
770 
771   /* Allocate Memory for Hash Table */
772   PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht));
773   HD   = baij->hd;
774   HT   = baij->ht;
775 
776   /* Loop Over A */
777   for (i=0; i<a->mbs; i++) {
778     for (j=ai[i]; j<ai[i+1]; j++) {
779       row = i+rstart;
780       col = aj[j]+cstart;
781 
782       key = row*Nbs + col + 1;
783       h1  = HASH(ht_size,key,tmp);
784       for (k=0; k<ht_size; k++) {
785         if (!HT[(h1+k)%ht_size]) {
786           HT[(h1+k)%ht_size] = key;
787           HD[(h1+k)%ht_size] = a->a + j*bs2;
788           break;
789 #if defined(PETSC_USE_INFO)
790         } else {
791           ct++;
792 #endif
793         }
794       }
795 #if defined(PETSC_USE_INFO)
796       if (k> max) max = k;
797 #endif
798     }
799   }
800   /* Loop Over B */
801   for (i=0; i<b->mbs; i++) {
802     for (j=bi[i]; j<bi[i+1]; j++) {
803       row = i+rstart;
804       col = garray[bj[j]];
805       key = row*Nbs + col + 1;
806       h1  = HASH(ht_size,key,tmp);
807       for (k=0; k<ht_size; k++) {
808         if (!HT[(h1+k)%ht_size]) {
809           HT[(h1+k)%ht_size] = key;
810           HD[(h1+k)%ht_size] = b->a + j*bs2;
811           break;
812 #if defined(PETSC_USE_INFO)
813         } else {
814           ct++;
815 #endif
816         }
817       }
818 #if defined(PETSC_USE_INFO)
819       if (k> max) max = k;
820 #endif
821     }
822   }
823 
824   /* Print Summary */
825 #if defined(PETSC_USE_INFO)
826   for (i=0,j=0; i<ht_size; i++) {
827     if (HT[i]) j++;
828   }
829   PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max));
830 #endif
831   PetscFunctionReturn(0);
832 }
833 
834 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
835 {
836   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
837   PetscInt       nstash,reallocs;
838 
839   PetscFunctionBegin;
840   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
841 
842   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
843   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
844   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
845   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
846   PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs));
847   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   PetscMPIInt    n;
860 
861   PetscFunctionBegin;
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   if (!baij->donotstash && !mat->nooffprocentries) {
864     while (1) {
865       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
877         i    = j;
878       }
879     }
880     PetscCall(MatStashScatterEnd_Private(&mat->stash));
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887 
888     baij->roworiented = PETSC_FALSE;
889     a->roworiented    = PETSC_FALSE;
890 
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) {
899           if (row[j] != rstart) break;
900         }
901         if (j < n) ncols = j-i;
902         else       ncols = n-i;
903         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
904         i    = j;
905       }
906     }
907     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
908 
909     baij->roworiented = r1;
910     a->roworiented    = r2;
911 
912     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
913   }
914 
915   PetscCall(MatAssemblyBegin(baij->A,mode));
916   PetscCall(MatAssemblyEnd(baij->A,mode));
917 
918   /* determine if any processor has disassembled, if so we must
919      also disassemble ourselves, in order that we may reassemble. */
920   /*
921      if nonzero structure of submatrix B cannot change then we know that
922      no processor disassembled thus we can skip this stuff
923   */
924   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
925     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
926     if (mat->was_assembled && !other_disassembled) {
927       PetscCall(MatDisAssemble_MPIBAIJ(mat));
928     }
929   }
930 
931   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
932     PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
933   }
934   PetscCall(MatAssemblyBegin(baij->B,mode));
935   PetscCall(MatAssemblyEnd(baij->B,mode));
936 
937 #if defined(PETSC_USE_INFO)
938   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
939     PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));
940 
941     baij->ht_total_ct  = 0;
942     baij->ht_insert_ct = 0;
943   }
944 #endif
945   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
946     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact));
947 
948     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
949     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
950   }
951 
952   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
953 
954   baij->rowvalues = NULL;
955 
956   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
957   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
958     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
959     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
960   }
961   PetscFunctionReturn(0);
962 }
963 
964 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
965 #include <petscdraw.h>
966 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
967 {
968   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
969   PetscMPIInt       rank = baij->rank;
970   PetscInt          bs   = mat->rmap->bs;
971   PetscBool         iascii,isdraw;
972   PetscViewer       sviewer;
973   PetscViewerFormat format;
974 
975   PetscFunctionBegin;
976   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
977   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
978   if (iascii) {
979     PetscCall(PetscViewerGetFormat(viewer,&format));
980     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
981       MatInfo info;
982       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
983       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
984       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
985       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",
986                                                  rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
987       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
988       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
989       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
990       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
991       PetscCall(PetscViewerFlush(viewer));
992       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
993       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
994       PetscCall(VecScatterView(baij->Mvctx,viewer));
995       PetscFunctionReturn(0);
996     } else if (format == PETSC_VIEWER_ASCII_INFO) {
997       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
998       PetscFunctionReturn(0);
999     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1000       PetscFunctionReturn(0);
1001     }
1002   }
1003 
1004   if (isdraw) {
1005     PetscDraw draw;
1006     PetscBool isnull;
1007     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1008     PetscCall(PetscDrawIsNull(draw,&isnull));
1009     if (isnull) PetscFunctionReturn(0);
1010   }
1011 
1012   {
1013     /* assemble the entire matrix onto first processor. */
1014     Mat         A;
1015     Mat_SeqBAIJ *Aloc;
1016     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1017     MatScalar   *a;
1018     const char  *matname;
1019 
1020     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1021     /* Perhaps this should be the type of mat? */
1022     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
1023     if (rank == 0) {
1024       PetscCall(MatSetSizes(A,M,N,M,N));
1025     } else {
1026       PetscCall(MatSetSizes(A,0,0,M,N));
1027     }
1028     PetscCall(MatSetType(A,MATMPIBAIJ));
1029     PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
1030     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
1031     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
1032 
1033     /* copy over the A part */
1034     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1035     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1036     PetscCall(PetscMalloc1(bs,&rvals));
1037 
1038     for (i=0; i<mbs; i++) {
1039       rvals[0] = bs*(baij->rstartbs + i);
1040       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1041       for (j=ai[i]; j<ai[i+1]; j++) {
1042         col = (baij->cstartbs+aj[j])*bs;
1043         for (k=0; k<bs; k++) {
1044           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
1045           col++; a += bs;
1046         }
1047       }
1048     }
1049     /* copy over the B part */
1050     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1051     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1052     for (i=0; i<mbs; i++) {
1053       rvals[0] = bs*(baij->rstartbs + i);
1054       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1055       for (j=ai[i]; j<ai[i+1]; j++) {
1056         col = baij->garray[aj[j]]*bs;
1057         for (k=0; k<bs; k++) {
1058           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
1059           col++; a += bs;
1060         }
1061       }
1062     }
1063     PetscCall(PetscFree(rvals));
1064     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1065     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1066     /*
1067        Everyone has to call to draw the matrix since the graphics waits are
1068        synchronized across all processors that share the PetscDraw object
1069     */
1070     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1071     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
1072     if (rank == 0) {
1073       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname));
1074       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer));
1075     }
1076     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1077     PetscCall(PetscViewerFlush(viewer));
1078     PetscCall(MatDestroy(&A));
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1084 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1085 {
1086   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1087   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1088   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1089   const PetscInt *garray = aij->garray;
1090   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1091   PetscInt       *rowlens,*colidxs;
1092   PetscScalar    *matvals;
1093 
1094   PetscFunctionBegin;
1095   PetscCall(PetscViewerSetUp(viewer));
1096 
1097   M  = mat->rmap->N;
1098   N  = mat->cmap->N;
1099   m  = mat->rmap->n;
1100   rs = mat->rmap->rstart;
1101   cs = mat->cmap->rstart;
1102   bs = mat->rmap->bs;
1103   nz = bs*bs*(A->nz + B->nz);
1104 
1105   /* write matrix header */
1106   header[0] = MAT_FILE_CLASSID;
1107   header[1] = M; header[2] = N; header[3] = nz;
1108   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1109   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1110 
1111   /* fill in and store row lengths */
1112   PetscCall(PetscMalloc1(m,&rowlens));
1113   for (cnt=0, i=0; i<A->mbs; i++)
1114     for (j=0; j<bs; j++)
1115       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1116   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1117   PetscCall(PetscFree(rowlens));
1118 
1119   /* fill in and store column indices */
1120   PetscCall(PetscMalloc1(nz,&colidxs));
1121   for (cnt=0, i=0; i<A->mbs; i++) {
1122     for (k=0; k<bs; k++) {
1123       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1124         if (garray[B->j[jb]] > cs/bs) break;
1125         for (l=0; l<bs; l++)
1126           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1127       }
1128       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1129         for (l=0; l<bs; l++)
1130           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1131       for (; jb<B->i[i+1]; jb++)
1132         for (l=0; l<bs; l++)
1133           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1134     }
1135   }
1136   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1137   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT));
1138   PetscCall(PetscFree(colidxs));
1139 
1140   /* fill in and store nonzero values */
1141   PetscCall(PetscMalloc1(nz,&matvals));
1142   for (cnt=0, i=0; i<A->mbs; i++) {
1143     for (k=0; k<bs; k++) {
1144       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1145         if (garray[B->j[jb]] > cs/bs) break;
1146         for (l=0; l<bs; l++)
1147           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1148       }
1149       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1150         for (l=0; l<bs; l++)
1151           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1152       for (; jb<B->i[i+1]; jb++)
1153         for (l=0; l<bs; l++)
1154           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1155     }
1156   }
1157   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR));
1158   PetscCall(PetscFree(matvals));
1159 
1160   /* write block size option to the viewer's .info file */
1161   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1166 {
1167   PetscBool      iascii,isdraw,issocket,isbinary;
1168 
1169   PetscFunctionBegin;
1170   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1171   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1172   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1173   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1174   if (iascii || isdraw || issocket) {
1175     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer));
1176   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat,viewer));
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1181 {
1182   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1183 
1184   PetscFunctionBegin;
1185 #if defined(PETSC_USE_LOG)
1186   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1187 #endif
1188   PetscCall(MatStashDestroy_Private(&mat->stash));
1189   PetscCall(MatStashDestroy_Private(&mat->bstash));
1190   PetscCall(MatDestroy(&baij->A));
1191   PetscCall(MatDestroy(&baij->B));
1192 #if defined(PETSC_USE_CTABLE)
1193   PetscCall(PetscTableDestroy(&baij->colmap));
1194 #else
1195   PetscCall(PetscFree(baij->colmap));
1196 #endif
1197   PetscCall(PetscFree(baij->garray));
1198   PetscCall(VecDestroy(&baij->lvec));
1199   PetscCall(VecScatterDestroy(&baij->Mvctx));
1200   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1201   PetscCall(PetscFree(baij->barray));
1202   PetscCall(PetscFree2(baij->hd,baij->ht));
1203   PetscCall(PetscFree(baij->rangebs));
1204   PetscCall(PetscFree(mat->data));
1205 
1206   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1207   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1208   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1209   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL));
1210   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL));
1215   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL));
1216 #if defined(PETSC_HAVE_HYPRE)
1217   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL));
1218 #endif
1219   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL));
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1224 {
1225   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1226   PetscInt       nt;
1227 
1228   PetscFunctionBegin;
1229   PetscCall(VecGetLocalSize(xx,&nt));
1230   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1231   PetscCall(VecGetLocalSize(yy,&nt));
1232   PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1233   PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1234   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
1235   PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1236   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1241 {
1242   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1243 
1244   PetscFunctionBegin;
1245   PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1246   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
1247   PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1248   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1253 {
1254   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1255 
1256   PetscFunctionBegin;
1257   /* do nondiagonal part */
1258   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1259   /* do local part */
1260   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1261   /* add partial results together */
1262   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1263   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1268 {
1269   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1270 
1271   PetscFunctionBegin;
1272   /* do nondiagonal part */
1273   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1274   /* do local part */
1275   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1276   /* add partial results together */
1277   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1278   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 /*
1283   This only works correctly for square matrices where the subblock A->A is the
1284    diagonal block
1285 */
1286 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1287 {
1288   PetscFunctionBegin;
1289   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1290   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v));
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1295 {
1296   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1297 
1298   PetscFunctionBegin;
1299   PetscCall(MatScale(a->A,aa));
1300   PetscCall(MatScale(a->B,aa));
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1305 {
1306   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1307   PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1308   PetscInt    bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1309   PetscInt    nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1310   PetscInt    *cmap,*idx_p,cstart = mat->cstartbs;
1311 
1312   PetscFunctionBegin;
1313   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1314   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1315   mat->getrowactive = PETSC_TRUE;
1316 
1317   if (!mat->rowvalues && (idx || v)) {
1318     /*
1319         allocate enough space to hold information from the longest row.
1320     */
1321     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1322     PetscInt    max = 1,mbs = mat->mbs,tmp;
1323     for (i=0; i<mbs; i++) {
1324       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1325       if (max < tmp) max = tmp;
1326     }
1327     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1328   }
1329   lrow = row - brstart;
1330 
1331   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1332   if (!v)   {pvA = NULL; pvB = NULL;}
1333   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1334   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1335   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1336   nztot = nzA + nzB;
1337 
1338   cmap = mat->garray;
1339   if (v  || idx) {
1340     if (nztot) {
1341       /* Sort by increasing column numbers, assuming A and B already sorted */
1342       PetscInt imark = -1;
1343       if (v) {
1344         *v = v_p = mat->rowvalues;
1345         for (i=0; i<nzB; i++) {
1346           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1347           else break;
1348         }
1349         imark = i;
1350         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1351         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1352       }
1353       if (idx) {
1354         *idx = idx_p = mat->rowindices;
1355         if (imark > -1) {
1356           for (i=0; i<imark; i++) {
1357             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1358           }
1359         } else {
1360           for (i=0; i<nzB; i++) {
1361             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1362             else break;
1363           }
1364           imark = i;
1365         }
1366         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1367         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1368       }
1369     } else {
1370       if (idx) *idx = NULL;
1371       if (v)   *v   = NULL;
1372     }
1373   }
1374   *nz  = nztot;
1375   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1376   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1381 {
1382   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1383 
1384   PetscFunctionBegin;
1385   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1386   baij->getrowactive = PETSC_FALSE;
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1391 {
1392   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1393 
1394   PetscFunctionBegin;
1395   PetscCall(MatZeroEntries(l->A));
1396   PetscCall(MatZeroEntries(l->B));
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1401 {
1402   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1403   Mat            A  = a->A,B = a->B;
1404   PetscLogDouble isend[5],irecv[5];
1405 
1406   PetscFunctionBegin;
1407   info->block_size = (PetscReal)matin->rmap->bs;
1408 
1409   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1410 
1411   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1412   isend[3] = info->memory;  isend[4] = info->mallocs;
1413 
1414   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1415 
1416   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1417   isend[3] += info->memory;  isend[4] += info->mallocs;
1418 
1419   if (flag == MAT_LOCAL) {
1420     info->nz_used      = isend[0];
1421     info->nz_allocated = isend[1];
1422     info->nz_unneeded  = isend[2];
1423     info->memory       = isend[3];
1424     info->mallocs      = isend[4];
1425   } else if (flag == MAT_GLOBAL_MAX) {
1426     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1427 
1428     info->nz_used      = irecv[0];
1429     info->nz_allocated = irecv[1];
1430     info->nz_unneeded  = irecv[2];
1431     info->memory       = irecv[3];
1432     info->mallocs      = irecv[4];
1433   } else if (flag == MAT_GLOBAL_SUM) {
1434     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1435 
1436     info->nz_used      = irecv[0];
1437     info->nz_allocated = irecv[1];
1438     info->nz_unneeded  = irecv[2];
1439     info->memory       = irecv[3];
1440     info->mallocs      = irecv[4];
1441   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1442   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1443   info->fill_ratio_needed = 0;
1444   info->factor_mallocs    = 0;
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1449 {
1450   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1451 
1452   PetscFunctionBegin;
1453   switch (op) {
1454   case MAT_NEW_NONZERO_LOCATIONS:
1455   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1456   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1457   case MAT_KEEP_NONZERO_PATTERN:
1458   case MAT_NEW_NONZERO_LOCATION_ERR:
1459     MatCheckPreallocated(A,1);
1460     PetscCall(MatSetOption(a->A,op,flg));
1461     PetscCall(MatSetOption(a->B,op,flg));
1462     break;
1463   case MAT_ROW_ORIENTED:
1464     MatCheckPreallocated(A,1);
1465     a->roworiented = flg;
1466 
1467     PetscCall(MatSetOption(a->A,op,flg));
1468     PetscCall(MatSetOption(a->B,op,flg));
1469     break;
1470   case MAT_FORCE_DIAGONAL_ENTRIES:
1471   case MAT_SORTED_FULL:
1472     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1473     break;
1474   case MAT_IGNORE_OFF_PROC_ENTRIES:
1475     a->donotstash = flg;
1476     break;
1477   case MAT_USE_HASH_TABLE:
1478     a->ht_flag = flg;
1479     a->ht_fact = 1.39;
1480     break;
1481   case MAT_SYMMETRIC:
1482   case MAT_STRUCTURALLY_SYMMETRIC:
1483   case MAT_HERMITIAN:
1484   case MAT_SUBMAT_SINGLEIS:
1485   case MAT_SYMMETRY_ETERNAL:
1486     MatCheckPreallocated(A,1);
1487     PetscCall(MatSetOption(a->A,op,flg));
1488     break;
1489   default:
1490     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1496 {
1497   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1498   Mat_SeqBAIJ    *Aloc;
1499   Mat            B;
1500   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1501   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1502   MatScalar      *a;
1503 
1504   PetscFunctionBegin;
1505   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1506     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1507     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1508     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1509     /* Do not know preallocation information, but must set block size */
1510     PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL));
1511   } else {
1512     B = *matout;
1513   }
1514 
1515   /* copy over the A part */
1516   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1517   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1518   PetscCall(PetscMalloc1(bs,&rvals));
1519 
1520   for (i=0; i<mbs; i++) {
1521     rvals[0] = bs*(baij->rstartbs + i);
1522     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1523     for (j=ai[i]; j<ai[i+1]; j++) {
1524       col = (baij->cstartbs+aj[j])*bs;
1525       for (k=0; k<bs; k++) {
1526         PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES));
1527 
1528         col++; a += bs;
1529       }
1530     }
1531   }
1532   /* copy over the B part */
1533   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1534   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1535   for (i=0; i<mbs; i++) {
1536     rvals[0] = bs*(baij->rstartbs + i);
1537     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1538     for (j=ai[i]; j<ai[i+1]; j++) {
1539       col = baij->garray[aj[j]]*bs;
1540       for (k=0; k<bs; k++) {
1541         PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES));
1542         col++;
1543         a += bs;
1544       }
1545     }
1546   }
1547   PetscCall(PetscFree(rvals));
1548   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1549   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1550 
1551   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1552   else {
1553     PetscCall(MatHeaderMerge(A,&B));
1554   }
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1559 {
1560   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1561   Mat            a     = baij->A,b = baij->B;
1562   PetscInt       s1,s2,s3;
1563 
1564   PetscFunctionBegin;
1565   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1566   if (rr) {
1567     PetscCall(VecGetLocalSize(rr,&s1));
1568     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1569     /* Overlap communication with computation. */
1570     PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1571   }
1572   if (ll) {
1573     PetscCall(VecGetLocalSize(ll,&s1));
1574     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1575     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1576   }
1577   /* scale  the diagonal block */
1578   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1579 
1580   if (rr) {
1581     /* Do a scatter end and then right scale the off-diagonal block */
1582     PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1583     PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1589 {
1590   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1591   PetscInt      *lrows;
1592   PetscInt       r, len;
1593   PetscBool      cong;
1594 
1595   PetscFunctionBegin;
1596   /* get locally owned rows */
1597   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
1598   /* fix right hand side if needed */
1599   if (x && b) {
1600     const PetscScalar *xx;
1601     PetscScalar       *bb;
1602 
1603     PetscCall(VecGetArrayRead(x,&xx));
1604     PetscCall(VecGetArray(b,&bb));
1605     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1606     PetscCall(VecRestoreArrayRead(x,&xx));
1607     PetscCall(VecRestoreArray(b,&bb));
1608   }
1609 
1610   /* actually zap the local rows */
1611   /*
1612         Zero the required rows. If the "diagonal block" of the matrix
1613      is square and the user wishes to set the diagonal we use separate
1614      code so that MatSetValues() is not called for each diagonal allocating
1615      new memory, thus calling lots of mallocs and slowing things down.
1616 
1617   */
1618   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1619   PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL));
1620   PetscCall(MatHasCongruentLayouts(A,&cong));
1621   if ((diag != 0.0) && cong) {
1622     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL));
1623   } else if (diag != 0.0) {
1624     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL));
1625     PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1626        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1627     for (r = 0; r < len; ++r) {
1628       const PetscInt row = lrows[r] + A->rmap->rstart;
1629       PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES));
1630     }
1631     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1632     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1633   } else {
1634     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL));
1635   }
1636   PetscCall(PetscFree(lrows));
1637 
1638   /* only change matrix nonzero state if pattern was allowed to be changed */
1639   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1640     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1641     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
1642   }
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1647 {
1648   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1649   PetscMPIInt       n = A->rmap->n,p = 0;
1650   PetscInt          i,j,k,r,len = 0,row,col,count;
1651   PetscInt          *lrows,*owners = A->rmap->range;
1652   PetscSFNode       *rrows;
1653   PetscSF           sf;
1654   const PetscScalar *xx;
1655   PetscScalar       *bb,*mask;
1656   Vec               xmask,lmask;
1657   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1658   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1659   PetscScalar       *aa;
1660 
1661   PetscFunctionBegin;
1662   /* Create SF where leaves are input rows and roots are owned rows */
1663   PetscCall(PetscMalloc1(n, &lrows));
1664   for (r = 0; r < n; ++r) lrows[r] = -1;
1665   PetscCall(PetscMalloc1(N, &rrows));
1666   for (r = 0; r < N; ++r) {
1667     const PetscInt idx   = rows[r];
1668     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
1669     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1670       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
1671     }
1672     rrows[r].rank  = p;
1673     rrows[r].index = rows[r] - owners[p];
1674   }
1675   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
1676   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1677   /* Collect flags for rows to be zeroed */
1678   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
1679   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
1680   PetscCall(PetscSFDestroy(&sf));
1681   /* Compress and put in row numbers */
1682   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1683   /* zero diagonal part of matrix */
1684   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
1685   /* handle off diagonal part of matrix */
1686   PetscCall(MatCreateVecs(A,&xmask,NULL));
1687   PetscCall(VecDuplicate(l->lvec,&lmask));
1688   PetscCall(VecGetArray(xmask,&bb));
1689   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1690   PetscCall(VecRestoreArray(xmask,&bb));
1691   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
1692   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
1693   PetscCall(VecDestroy(&xmask));
1694   if (x) {
1695     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
1696     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
1697     PetscCall(VecGetArrayRead(l->lvec,&xx));
1698     PetscCall(VecGetArray(b,&bb));
1699   }
1700   PetscCall(VecGetArray(lmask,&mask));
1701   /* remove zeroed rows of off diagonal matrix */
1702   for (i = 0; i < len; ++i) {
1703     row   = lrows[i];
1704     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1705     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1706     for (k = 0; k < count; ++k) {
1707       aa[0] = 0.0;
1708       aa   += bs;
1709     }
1710   }
1711   /* loop over all elements of off process part of matrix zeroing removed columns*/
1712   for (i = 0; i < l->B->rmap->N; ++i) {
1713     row = i/bs;
1714     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1715       for (k = 0; k < bs; ++k) {
1716         col = bs*baij->j[j] + k;
1717         if (PetscAbsScalar(mask[col])) {
1718           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1719           if (x) bb[i] -= aa[0]*xx[col];
1720           aa[0] = 0.0;
1721         }
1722       }
1723     }
1724   }
1725   if (x) {
1726     PetscCall(VecRestoreArray(b,&bb));
1727     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
1728   }
1729   PetscCall(VecRestoreArray(lmask,&mask));
1730   PetscCall(VecDestroy(&lmask));
1731   PetscCall(PetscFree(lrows));
1732 
1733   /* only change matrix nonzero state if pattern was allowed to be changed */
1734   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1735     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1736     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1742 {
1743   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1744 
1745   PetscFunctionBegin;
1746   PetscCall(MatSetUnfactored(a->A));
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1751 
1752 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1753 {
1754   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1755   Mat            a,b,c,d;
1756   PetscBool      flg;
1757 
1758   PetscFunctionBegin;
1759   a = matA->A; b = matA->B;
1760   c = matB->A; d = matB->B;
1761 
1762   PetscCall(MatEqual(a,c,&flg));
1763   if (flg) {
1764     PetscCall(MatEqual(b,d,&flg));
1765   }
1766   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1771 {
1772   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1773   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1774 
1775   PetscFunctionBegin;
1776   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1777   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1778     PetscCall(MatCopy_Basic(A,B,str));
1779   } else {
1780     PetscCall(MatCopy(a->A,b->A,str));
1781     PetscCall(MatCopy(a->B,b->B,str));
1782   }
1783   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1788 {
1789   PetscFunctionBegin;
1790   PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1795 {
1796   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1797   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1798   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1799 
1800   PetscFunctionBegin;
1801   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1806 {
1807   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1808   PetscBLASInt   bnz,one=1;
1809   Mat_SeqBAIJ    *x,*y;
1810   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1811 
1812   PetscFunctionBegin;
1813   if (str == SAME_NONZERO_PATTERN) {
1814     PetscScalar alpha = a;
1815     x    = (Mat_SeqBAIJ*)xx->A->data;
1816     y    = (Mat_SeqBAIJ*)yy->A->data;
1817     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
1818     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1819     x    = (Mat_SeqBAIJ*)xx->B->data;
1820     y    = (Mat_SeqBAIJ*)yy->B->data;
1821     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
1822     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1823     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1824   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1825     PetscCall(MatAXPY_Basic(Y,a,X,str));
1826   } else {
1827     Mat      B;
1828     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1829     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1830     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1831     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1832     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1833     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1834     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1835     PetscCall(MatSetType(B,MATMPIBAIJ));
1836     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d));
1837     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1838     PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1839     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1840     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1841     PetscCall(MatHeaderMerge(Y,&B));
1842     PetscCall(PetscFree(nnz_d));
1843     PetscCall(PetscFree(nnz_o));
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1849 {
1850   PetscFunctionBegin;
1851   if (PetscDefined(USE_COMPLEX)) {
1852     Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data;
1853 
1854     PetscCall(MatConjugate_SeqBAIJ(a->A));
1855     PetscCall(MatConjugate_SeqBAIJ(a->B));
1856   }
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1861 {
1862   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1863 
1864   PetscFunctionBegin;
1865   PetscCall(MatRealPart(a->A));
1866   PetscCall(MatRealPart(a->B));
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1871 {
1872   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1873 
1874   PetscFunctionBegin;
1875   PetscCall(MatImaginaryPart(a->A));
1876   PetscCall(MatImaginaryPart(a->B));
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1881 {
1882   IS             iscol_local;
1883   PetscInt       csize;
1884 
1885   PetscFunctionBegin;
1886   PetscCall(ISGetLocalSize(iscol,&csize));
1887   if (call == MAT_REUSE_MATRIX) {
1888     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1889     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1890   } else {
1891     PetscCall(ISAllGather(iscol,&iscol_local));
1892   }
1893   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1894   if (call == MAT_INITIAL_MATRIX) {
1895     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1896     PetscCall(ISDestroy(&iscol_local));
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 /*
1902   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1903   in local and then by concatenating the local matrices the end result.
1904   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1905   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1906 */
1907 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1908 {
1909   PetscMPIInt    rank,size;
1910   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1911   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1912   Mat            M,Mreuse;
1913   MatScalar      *vwork,*aa;
1914   MPI_Comm       comm;
1915   IS             isrow_new, iscol_new;
1916   Mat_SeqBAIJ    *aij;
1917 
1918   PetscFunctionBegin;
1919   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
1920   PetscCallMPI(MPI_Comm_rank(comm,&rank));
1921   PetscCallMPI(MPI_Comm_size(comm,&size));
1922   /* The compression and expansion should be avoided. Doesn't point
1923      out errors, might change the indices, hence buggey */
1924   PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new));
1925   PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new));
1926 
1927   if (call ==  MAT_REUSE_MATRIX) {
1928     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
1929     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1930     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse));
1931   } else {
1932     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse));
1933   }
1934   PetscCall(ISDestroy(&isrow_new));
1935   PetscCall(ISDestroy(&iscol_new));
1936   /*
1937       m - number of local rows
1938       n - number of columns (same on all processors)
1939       rstart - first row in new global matrix generated
1940   */
1941   PetscCall(MatGetBlockSize(mat,&bs));
1942   PetscCall(MatGetSize(Mreuse,&m,&n));
1943   m    = m/bs;
1944   n    = n/bs;
1945 
1946   if (call == MAT_INITIAL_MATRIX) {
1947     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1948     ii  = aij->i;
1949     jj  = aij->j;
1950 
1951     /*
1952         Determine the number of non-zeros in the diagonal and off-diagonal
1953         portions of the matrix in order to do correct preallocation
1954     */
1955 
1956     /* first get start and end of "diagonal" columns */
1957     if (csize == PETSC_DECIDE) {
1958       PetscCall(ISGetSize(isrow,&mglobal));
1959       if (mglobal == n*bs) { /* square matrix */
1960         nlocal = m;
1961       } else {
1962         nlocal = n/size + ((n % size) > rank);
1963       }
1964     } else {
1965       nlocal = csize/bs;
1966     }
1967     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
1968     rstart = rend - nlocal;
1969     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
1970 
1971     /* next, compute all the lengths */
1972     PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens));
1973     for (i=0; i<m; i++) {
1974       jend = ii[i+1] - ii[i];
1975       olen = 0;
1976       dlen = 0;
1977       for (j=0; j<jend; j++) {
1978         if (*jj < rstart || *jj >= rend) olen++;
1979         else dlen++;
1980         jj++;
1981       }
1982       olens[i] = olen;
1983       dlens[i] = dlen;
1984     }
1985     PetscCall(MatCreate(comm,&M));
1986     PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n));
1987     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
1988     PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens));
1989     PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens));
1990     PetscCall(PetscFree2(dlens,olens));
1991   } else {
1992     PetscInt ml,nl;
1993 
1994     M    = *newmat;
1995     PetscCall(MatGetLocalSize(M,&ml,&nl));
1996     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1997     PetscCall(MatZeroEntries(M));
1998     /*
1999          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2000        rather than the slower MatSetValues().
2001     */
2002     M->was_assembled = PETSC_TRUE;
2003     M->assembled     = PETSC_FALSE;
2004   }
2005   PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE));
2006   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
2007   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2008   ii   = aij->i;
2009   jj   = aij->j;
2010   aa   = aij->a;
2011   for (i=0; i<m; i++) {
2012     row   = rstart/bs + i;
2013     nz    = ii[i+1] - ii[i];
2014     cwork = jj;     jj += nz;
2015     vwork = aa;     aa += nz*bs*bs;
2016     PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
2017   }
2018 
2019   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
2020   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
2021   *newmat = M;
2022 
2023   /* save submatrix used in processor for next request */
2024   if (call ==  MAT_INITIAL_MATRIX) {
2025     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
2026     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2032 {
2033   MPI_Comm       comm,pcomm;
2034   PetscInt       clocal_size,nrows;
2035   const PetscInt *rows;
2036   PetscMPIInt    size;
2037   IS             crowp,lcolp;
2038 
2039   PetscFunctionBegin;
2040   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2041   /* make a collective version of 'rowp' */
2042   PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm));
2043   if (pcomm==comm) {
2044     crowp = rowp;
2045   } else {
2046     PetscCall(ISGetSize(rowp,&nrows));
2047     PetscCall(ISGetIndices(rowp,&rows));
2048     PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp));
2049     PetscCall(ISRestoreIndices(rowp,&rows));
2050   }
2051   PetscCall(ISSetPermutation(crowp));
2052   /* make a local version of 'colp' */
2053   PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm));
2054   PetscCallMPI(MPI_Comm_size(pcomm,&size));
2055   if (size==1) {
2056     lcolp = colp;
2057   } else {
2058     PetscCall(ISAllGather(colp,&lcolp));
2059   }
2060   PetscCall(ISSetPermutation(lcolp));
2061   /* now we just get the submatrix */
2062   PetscCall(MatGetLocalSize(A,NULL,&clocal_size));
2063   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B));
2064   /* clean up */
2065   if (pcomm!=comm) {
2066     PetscCall(ISDestroy(&crowp));
2067   }
2068   if (size>1) {
2069     PetscCall(ISDestroy(&lcolp));
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2075 {
2076   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2077   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2078 
2079   PetscFunctionBegin;
2080   if (nghosts) *nghosts = B->nbs;
2081   if (ghosts) *ghosts = baij->garray;
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2086 {
2087   Mat            B;
2088   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2089   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2090   Mat_SeqAIJ     *b;
2091   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2092   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2093   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2094 
2095   PetscFunctionBegin;
2096   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2097   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2098 
2099   /* ----------------------------------------------------------------
2100      Tell every processor the number of nonzeros per row
2101   */
2102   PetscCall(PetscMalloc1(A->rmap->N/bs,&lens));
2103   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2104     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2105   }
2106   PetscCall(PetscMalloc1(2*size,&recvcounts));
2107   displs    = recvcounts + size;
2108   for (i=0; i<size; i++) {
2109     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2110     displs[i]     = A->rmap->range[i]/bs;
2111   }
2112   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A)));
2113   /* ---------------------------------------------------------------
2114      Create the sequential matrix of the same type as the local block diagonal
2115   */
2116   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
2117   PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE));
2118   PetscCall(MatSetType(B,MATSEQAIJ));
2119   PetscCall(MatSeqAIJSetPreallocation(B,0,lens));
2120   b    = (Mat_SeqAIJ*)B->data;
2121 
2122   /*--------------------------------------------------------------------
2123     Copy my part of matrix column indices over
2124   */
2125   sendcount  = ad->nz + bd->nz;
2126   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2127   a_jsendbuf = ad->j;
2128   b_jsendbuf = bd->j;
2129   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2130   cnt        = 0;
2131   for (i=0; i<n; i++) {
2132 
2133     /* put in lower diagonal portion */
2134     m = bd->i[i+1] - bd->i[i];
2135     while (m > 0) {
2136       /* is it above diagonal (in bd (compressed) numbering) */
2137       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2138       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2139       m--;
2140     }
2141 
2142     /* put in diagonal portion */
2143     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2144       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2145     }
2146 
2147     /* put in upper diagonal portion */
2148     while (m-- > 0) {
2149       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2150     }
2151   }
2152   PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt);
2153 
2154   /*--------------------------------------------------------------------
2155     Gather all column indices to all processors
2156   */
2157   for (i=0; i<size; i++) {
2158     recvcounts[i] = 0;
2159     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2160       recvcounts[i] += lens[j];
2161     }
2162   }
2163   displs[0] = 0;
2164   for (i=1; i<size; i++) {
2165     displs[i] = displs[i-1] + recvcounts[i-1];
2166   }
2167   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A)));
2168   /*--------------------------------------------------------------------
2169     Assemble the matrix into useable form (note numerical values not yet set)
2170   */
2171   /* set the b->ilen (length of each row) values */
2172   PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs));
2173   /* set the b->i indices */
2174   b->i[0] = 0;
2175   for (i=1; i<=A->rmap->N/bs; i++) {
2176     b->i[i] = b->i[i-1] + lens[i-1];
2177   }
2178   PetscCall(PetscFree(lens));
2179   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2180   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2181   PetscCall(PetscFree(recvcounts));
2182 
2183   if (A->symmetric) PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE));
2184   else if (A->hermitian) PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE));
2185   else if (A->structurally_symmetric) PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE));
2186   *newmat = B;
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2191 {
2192   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2193   Vec            bb1 = NULL;
2194 
2195   PetscFunctionBegin;
2196   if (flag == SOR_APPLY_UPPER) {
2197     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2198     PetscFunctionReturn(0);
2199   }
2200 
2201   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2202     PetscCall(VecDuplicate(bb,&bb1));
2203   }
2204 
2205   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2206     if (flag & SOR_ZERO_INITIAL_GUESS) {
2207       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2208       its--;
2209     }
2210 
2211     while (its--) {
2212       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2213       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2214 
2215       /* update rhs: bb1 = bb - B*x */
2216       PetscCall(VecScale(mat->lvec,-1.0));
2217       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2218 
2219       /* local sweep */
2220       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
2221     }
2222   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2223     if (flag & SOR_ZERO_INITIAL_GUESS) {
2224       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2225       its--;
2226     }
2227     while (its--) {
2228       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2229       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2230 
2231       /* update rhs: bb1 = bb - B*x */
2232       PetscCall(VecScale(mat->lvec,-1.0));
2233       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2234 
2235       /* local sweep */
2236       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
2237     }
2238   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2239     if (flag & SOR_ZERO_INITIAL_GUESS) {
2240       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2241       its--;
2242     }
2243     while (its--) {
2244       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2245       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2246 
2247       /* update rhs: bb1 = bb - B*x */
2248       PetscCall(VecScale(mat->lvec,-1.0));
2249       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2250 
2251       /* local sweep */
2252       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
2253     }
2254   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2255 
2256   PetscCall(VecDestroy(&bb1));
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions)
2261 {
2262   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2263   PetscInt       m,N,i,*garray = aij->garray;
2264   PetscInt       ib,jb,bs = A->rmap->bs;
2265   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2266   MatScalar      *a_val = a_aij->a;
2267   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2268   MatScalar      *b_val = b_aij->a;
2269   PetscReal      *work;
2270 
2271   PetscFunctionBegin;
2272   PetscCall(MatGetSize(A,&m,&N));
2273   PetscCall(PetscCalloc1(N,&work));
2274   if (type == NORM_2) {
2275     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2276       for (jb=0; jb<bs; jb++) {
2277         for (ib=0; ib<bs; ib++) {
2278           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2279           a_val++;
2280         }
2281       }
2282     }
2283     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2284       for (jb=0; jb<bs; jb++) {
2285         for (ib=0; ib<bs; ib++) {
2286           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2287           b_val++;
2288         }
2289       }
2290     }
2291   } else if (type == NORM_1) {
2292     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2293       for (jb=0; jb<bs; jb++) {
2294         for (ib=0; ib<bs; ib++) {
2295           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2296           a_val++;
2297         }
2298       }
2299     }
2300     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2301       for (jb=0; jb<bs; jb++) {
2302        for (ib=0; ib<bs; ib++) {
2303           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2304           b_val++;
2305         }
2306       }
2307     }
2308   } else if (type == NORM_INFINITY) {
2309     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2310       for (jb=0; jb<bs; jb++) {
2311         for (ib=0; ib<bs; ib++) {
2312           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2313           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2314           a_val++;
2315         }
2316       }
2317     }
2318     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2319       for (jb=0; jb<bs; jb++) {
2320         for (ib=0; ib<bs; ib++) {
2321           int col = garray[b_aij->j[i]] * bs + jb;
2322           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2323           b_val++;
2324         }
2325       }
2326     }
2327   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2328     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2329       for (jb=0; jb<bs; jb++) {
2330         for (ib=0; ib<bs; ib++) {
2331           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2332           a_val++;
2333         }
2334       }
2335     }
2336     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2337       for (jb=0; jb<bs; jb++) {
2338        for (ib=0; ib<bs; ib++) {
2339           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2340           b_val++;
2341         }
2342       }
2343     }
2344   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2345     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2346       for (jb=0; jb<bs; jb++) {
2347         for (ib=0; ib<bs; ib++) {
2348           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2349           a_val++;
2350         }
2351       }
2352     }
2353     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2354       for (jb=0; jb<bs; jb++) {
2355        for (ib=0; ib<bs; ib++) {
2356           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2357           b_val++;
2358         }
2359       }
2360     }
2361   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
2362   if (type == NORM_INFINITY) {
2363     PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
2364   } else {
2365     PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
2366   }
2367   PetscCall(PetscFree(work));
2368   if (type == NORM_2) {
2369     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2370   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2371     for (i=0; i<N; i++) reductions[i] /= m;
2372   }
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2377 {
2378   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2379 
2380   PetscFunctionBegin;
2381   PetscCall(MatInvertBlockDiagonal(a->A,values));
2382   A->factorerrortype             = a->A->factorerrortype;
2383   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2384   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2389 {
2390   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2391   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2392 
2393   PetscFunctionBegin;
2394   if (!Y->preallocated) {
2395     PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
2396   } else if (!aij->nz) {
2397     PetscInt nonew = aij->nonew;
2398     PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
2399     aij->nonew = nonew;
2400   }
2401   PetscCall(MatShift_Basic(Y,a));
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2406 {
2407   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2408 
2409   PetscFunctionBegin;
2410   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2411   PetscCall(MatMissingDiagonal(a->A,missing,d));
2412   if (d) {
2413     PetscInt rstart;
2414     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2415     *d += rstart/A->rmap->bs;
2416 
2417   }
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2422 {
2423   PetscFunctionBegin;
2424   *a = ((Mat_MPIBAIJ*)A->data)->A;
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 /* -------------------------------------------------------------------*/
2429 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2430                                        MatGetRow_MPIBAIJ,
2431                                        MatRestoreRow_MPIBAIJ,
2432                                        MatMult_MPIBAIJ,
2433                                 /* 4*/ MatMultAdd_MPIBAIJ,
2434                                        MatMultTranspose_MPIBAIJ,
2435                                        MatMultTransposeAdd_MPIBAIJ,
2436                                        NULL,
2437                                        NULL,
2438                                        NULL,
2439                                 /*10*/ NULL,
2440                                        NULL,
2441                                        NULL,
2442                                        MatSOR_MPIBAIJ,
2443                                        MatTranspose_MPIBAIJ,
2444                                 /*15*/ MatGetInfo_MPIBAIJ,
2445                                        MatEqual_MPIBAIJ,
2446                                        MatGetDiagonal_MPIBAIJ,
2447                                        MatDiagonalScale_MPIBAIJ,
2448                                        MatNorm_MPIBAIJ,
2449                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2450                                        MatAssemblyEnd_MPIBAIJ,
2451                                        MatSetOption_MPIBAIJ,
2452                                        MatZeroEntries_MPIBAIJ,
2453                                 /*24*/ MatZeroRows_MPIBAIJ,
2454                                        NULL,
2455                                        NULL,
2456                                        NULL,
2457                                        NULL,
2458                                 /*29*/ MatSetUp_MPIBAIJ,
2459                                        NULL,
2460                                        NULL,
2461                                        MatGetDiagonalBlock_MPIBAIJ,
2462                                        NULL,
2463                                 /*34*/ MatDuplicate_MPIBAIJ,
2464                                        NULL,
2465                                        NULL,
2466                                        NULL,
2467                                        NULL,
2468                                 /*39*/ MatAXPY_MPIBAIJ,
2469                                        MatCreateSubMatrices_MPIBAIJ,
2470                                        MatIncreaseOverlap_MPIBAIJ,
2471                                        MatGetValues_MPIBAIJ,
2472                                        MatCopy_MPIBAIJ,
2473                                 /*44*/ NULL,
2474                                        MatScale_MPIBAIJ,
2475                                        MatShift_MPIBAIJ,
2476                                        NULL,
2477                                        MatZeroRowsColumns_MPIBAIJ,
2478                                 /*49*/ NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        NULL,
2483                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2484                                        NULL,
2485                                        MatSetUnfactored_MPIBAIJ,
2486                                        MatPermute_MPIBAIJ,
2487                                        MatSetValuesBlocked_MPIBAIJ,
2488                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2489                                        MatDestroy_MPIBAIJ,
2490                                        MatView_MPIBAIJ,
2491                                        NULL,
2492                                        NULL,
2493                                 /*64*/ NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        NULL,
2498                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                 /*74*/ NULL,
2504                                        MatFDColoringApply_BAIJ,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                 /*79*/ NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        MatLoad_MPIBAIJ,
2513                                 /*84*/ NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                 /*89*/ NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                 /*94*/ NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                 /*99*/ NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        MatConjugate_MPIBAIJ,
2532                                        NULL,
2533                                 /*104*/NULL,
2534                                        MatRealPart_MPIBAIJ,
2535                                        MatImaginaryPart_MPIBAIJ,
2536                                        NULL,
2537                                        NULL,
2538                                 /*109*/NULL,
2539                                        NULL,
2540                                        NULL,
2541                                        NULL,
2542                                        MatMissingDiagonal_MPIBAIJ,
2543                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2544                                        NULL,
2545                                        MatGetGhosts_MPIBAIJ,
2546                                        NULL,
2547                                        NULL,
2548                                 /*119*/NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        NULL,
2552                                        MatGetMultiProcBlock_MPIBAIJ,
2553                                 /*124*/NULL,
2554                                        MatGetColumnReductions_MPIBAIJ,
2555                                        MatInvertBlockDiagonal_MPIBAIJ,
2556                                        NULL,
2557                                        NULL,
2558                                /*129*/ NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                /*134*/ NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        NULL,
2568                                /*139*/ MatSetBlockSizes_Default,
2569                                        NULL,
2570                                        NULL,
2571                                        MatFDColoringSetUp_MPIXAIJ,
2572                                        NULL,
2573                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL
2577 };
2578 
2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2581 
2582 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2583 {
2584   PetscInt       m,rstart,cstart,cend;
2585   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2586   const PetscInt *JJ    =NULL;
2587   PetscScalar    *values=NULL;
2588   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2589   PetscBool      nooffprocentries;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
2593   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
2594   PetscCall(PetscLayoutSetUp(B->rmap));
2595   PetscCall(PetscLayoutSetUp(B->cmap));
2596   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2597   m      = B->rmap->n/bs;
2598   rstart = B->rmap->rstart/bs;
2599   cstart = B->cmap->rstart/bs;
2600   cend   = B->cmap->rend/bs;
2601 
2602   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2603   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2604   for (i=0; i<m; i++) {
2605     nz = ii[i+1] - ii[i];
2606     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2607     nz_max = PetscMax(nz_max,nz);
2608     dlen   = 0;
2609     olen   = 0;
2610     JJ     = jj + ii[i];
2611     for (j=0; j<nz; j++) {
2612       if (*JJ < cstart || *JJ >= cend) olen++;
2613       else dlen++;
2614       JJ++;
2615     }
2616     d_nnz[i] = dlen;
2617     o_nnz[i] = olen;
2618   }
2619   PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2620   PetscCall(PetscFree2(d_nnz,o_nnz));
2621 
2622   values = (PetscScalar*)V;
2623   if (!values) {
2624     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2625   }
2626   for (i=0; i<m; i++) {
2627     PetscInt          row    = i + rstart;
2628     PetscInt          ncols  = ii[i+1] - ii[i];
2629     const PetscInt    *icols = jj + ii[i];
2630     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2631       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2632       PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2633     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2634       PetscInt j;
2635       for (j=0; j<ncols; j++) {
2636         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2637         PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2638       }
2639     }
2640   }
2641 
2642   if (!V) PetscCall(PetscFree(values));
2643   nooffprocentries    = B->nooffprocentries;
2644   B->nooffprocentries = PETSC_TRUE;
2645   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2646   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2647   B->nooffprocentries = nooffprocentries;
2648 
2649   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 /*@C
2654    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2655 
2656    Collective
2657 
2658    Input Parameters:
2659 +  B - the matrix
2660 .  bs - the block size
2661 .  i - the indices into j for the start of each local row (starts with zero)
2662 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2663 -  v - optional values in the matrix
2664 
2665    Level: advanced
2666 
2667    Notes:
2668     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2669    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2670    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2671    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2672    block column and the second index is over columns within a block.
2673 
2674    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2675 
2676 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2677 @*/
2678 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2679 {
2680   PetscFunctionBegin;
2681   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2682   PetscValidType(B,1);
2683   PetscValidLogicalCollectiveInt(B,bs,2);
2684   PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2689 {
2690   Mat_MPIBAIJ    *b;
2691   PetscInt       i;
2692   PetscMPIInt    size;
2693 
2694   PetscFunctionBegin;
2695   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
2696   PetscCall(PetscLayoutSetUp(B->rmap));
2697   PetscCall(PetscLayoutSetUp(B->cmap));
2698   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2699 
2700   if (d_nnz) {
2701     for (i=0; i<B->rmap->n/bs; i++) {
2702       PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]);
2703     }
2704   }
2705   if (o_nnz) {
2706     for (i=0; i<B->rmap->n/bs; i++) {
2707       PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]);
2708     }
2709   }
2710 
2711   b      = (Mat_MPIBAIJ*)B->data;
2712   b->bs2 = bs*bs;
2713   b->mbs = B->rmap->n/bs;
2714   b->nbs = B->cmap->n/bs;
2715   b->Mbs = B->rmap->N/bs;
2716   b->Nbs = B->cmap->N/bs;
2717 
2718   for (i=0; i<=b->size; i++) {
2719     b->rangebs[i] = B->rmap->range[i]/bs;
2720   }
2721   b->rstartbs = B->rmap->rstart/bs;
2722   b->rendbs   = B->rmap->rend/bs;
2723   b->cstartbs = B->cmap->rstart/bs;
2724   b->cendbs   = B->cmap->rend/bs;
2725 
2726 #if defined(PETSC_USE_CTABLE)
2727   PetscCall(PetscTableDestroy(&b->colmap));
2728 #else
2729   PetscCall(PetscFree(b->colmap));
2730 #endif
2731   PetscCall(PetscFree(b->garray));
2732   PetscCall(VecDestroy(&b->lvec));
2733   PetscCall(VecScatterDestroy(&b->Mvctx));
2734 
2735   /* Because the B will have been resized we simply destroy it and create a new one each time */
2736   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2737   PetscCall(MatDestroy(&b->B));
2738   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2739   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2740   PetscCall(MatSetType(b->B,MATSEQBAIJ));
2741   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2742 
2743   if (!B->preallocated) {
2744     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2745     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2746     PetscCall(MatSetType(b->A,MATSEQBAIJ));
2747     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2748     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
2749   }
2750 
2751   PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
2752   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
2753   B->preallocated  = PETSC_TRUE;
2754   B->was_assembled = PETSC_FALSE;
2755   B->assembled     = PETSC_FALSE;
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2760 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2761 
2762 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2763 {
2764   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2765   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2766   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2767   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2768 
2769   PetscFunctionBegin;
2770   PetscCall(PetscMalloc1(M+1,&ii));
2771   ii[0] = 0;
2772   for (i=0; i<M; i++) {
2773     PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]);
2774     PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]);
2775     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2776     /* remove one from count of matrix has diagonal */
2777     for (j=id[i]; j<id[i+1]; j++) {
2778       if (jd[j] == i) {ii[i+1]--;break;}
2779     }
2780   }
2781   PetscCall(PetscMalloc1(ii[M],&jj));
2782   cnt  = 0;
2783   for (i=0; i<M; i++) {
2784     for (j=io[i]; j<io[i+1]; j++) {
2785       if (garray[jo[j]] > rstart) break;
2786       jj[cnt++] = garray[jo[j]];
2787     }
2788     for (k=id[i]; k<id[i+1]; k++) {
2789       if (jd[k] != i) {
2790         jj[cnt++] = rstart + jd[k];
2791       }
2792     }
2793     for (; j<io[i+1]; j++) {
2794       jj[cnt++] = garray[jo[j]];
2795     }
2796   }
2797   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj));
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2802 
2803 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2804 
2805 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2806 {
2807   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2808   Mat_MPIAIJ  *b;
2809   Mat         B;
2810 
2811   PetscFunctionBegin;
2812   PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2813 
2814   if (reuse == MAT_REUSE_MATRIX) {
2815     B = *newmat;
2816   } else {
2817     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2818     PetscCall(MatSetType(B,MATMPIAIJ));
2819     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2820     PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs));
2821     PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
2822     PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL));
2823   }
2824   b = (Mat_MPIAIJ*) B->data;
2825 
2826   if (reuse == MAT_REUSE_MATRIX) {
2827     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2828     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2829   } else {
2830     PetscCall(MatDestroy(&b->A));
2831     PetscCall(MatDestroy(&b->B));
2832     PetscCall(MatDisAssemble_MPIBAIJ(A));
2833     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2834     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2835     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
2836     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
2837   }
2838   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2839   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2840 
2841   if (reuse == MAT_INPLACE_MATRIX) {
2842     PetscCall(MatHeaderReplace(A,&B));
2843   } else {
2844    *newmat = B;
2845   }
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 /*MC
2850    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2851 
2852    Options Database Keys:
2853 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2854 . -mat_block_size <bs> - set the blocksize used to store the matrix
2855 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2856 - -mat_use_hash_table <fact> - set hash table factor
2857 
2858    Level: beginner
2859 
2860    Notes:
2861     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2862     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2863 
2864 .seealso: `MatCreateBAIJ`
2865 M*/
2866 
2867 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2868 
2869 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2870 {
2871   Mat_MPIBAIJ    *b;
2872   PetscBool      flg = PETSC_FALSE;
2873 
2874   PetscFunctionBegin;
2875   PetscCall(PetscNewLog(B,&b));
2876   B->data = (void*)b;
2877 
2878   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2879   B->assembled = PETSC_FALSE;
2880 
2881   B->insertmode = NOT_SET_VALUES;
2882   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2883   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2884 
2885   /* build local table of row and column ownerships */
2886   PetscCall(PetscMalloc1(b->size+1,&b->rangebs));
2887 
2888   /* build cache for off array entries formed */
2889   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2890 
2891   b->donotstash  = PETSC_FALSE;
2892   b->colmap      = NULL;
2893   b->garray      = NULL;
2894   b->roworiented = PETSC_TRUE;
2895 
2896   /* stuff used in block assembly */
2897   b->barray = NULL;
2898 
2899   /* stuff used for matrix vector multiply */
2900   b->lvec  = NULL;
2901   b->Mvctx = NULL;
2902 
2903   /* stuff for MatGetRow() */
2904   b->rowindices   = NULL;
2905   b->rowvalues    = NULL;
2906   b->getrowactive = PETSC_FALSE;
2907 
2908   /* hash table stuff */
2909   b->ht           = NULL;
2910   b->hd           = NULL;
2911   b->ht_size      = 0;
2912   b->ht_flag      = PETSC_FALSE;
2913   b->ht_fact      = 0;
2914   b->ht_total_ct  = 0;
2915   b->ht_insert_ct = 0;
2916 
2917   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2918   b->ijonly = PETSC_FALSE;
2919 
2920   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj));
2921   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ));
2922   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ));
2923 #if defined(PETSC_HAVE_HYPRE)
2924   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE));
2925 #endif
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ));
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ));
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2930   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ));
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS));
2933   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ));
2934 
2935   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2936   PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg));
2937   if (flg) {
2938     PetscReal fact = 1.39;
2939     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2940     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2941     if (fact <= 1.0) fact = 1.39;
2942     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2943     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2944   }
2945   PetscOptionsEnd();
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 /*MC
2950    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2951 
2952    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2953    and MATMPIBAIJ otherwise.
2954 
2955    Options Database Keys:
2956 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2957 
2958   Level: beginner
2959 
2960 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2961 M*/
2962 
2963 /*@C
2964    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2965    (block compressed row).  For good matrix assembly performance
2966    the user should preallocate the matrix storage by setting the parameters
2967    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2968    performance can be increased by more than a factor of 50.
2969 
2970    Collective on Mat
2971 
2972    Input Parameters:
2973 +  B - the matrix
2974 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2975           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2976 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2977            submatrix  (same for all local rows)
2978 .  d_nnz - array containing the number of block nonzeros in the various block rows
2979            of the in diagonal portion of the local (possibly different for each block
2980            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2981            set it even if it is zero.
2982 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2983            submatrix (same for all local rows).
2984 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2985            off-diagonal portion of the local submatrix (possibly different for
2986            each block row) or NULL.
2987 
2988    If the *_nnz parameter is given then the *_nz parameter is ignored
2989 
2990    Options Database Keys:
2991 +   -mat_block_size - size of the blocks to use
2992 -   -mat_use_hash_table <fact> - set hash table factor
2993 
2994    Notes:
2995    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2996    than it must be used on all processors that share the object for that argument.
2997 
2998    Storage Information:
2999    For a square global matrix we define each processor's diagonal portion
3000    to be its local rows and the corresponding columns (a square submatrix);
3001    each processor's off-diagonal portion encompasses the remainder of the
3002    local matrix (a rectangular submatrix).
3003 
3004    The user can specify preallocated storage for the diagonal part of
3005    the local submatrix with either d_nz or d_nnz (not both).  Set
3006    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3007    memory allocation.  Likewise, specify preallocated storage for the
3008    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3009 
3010    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3011    the figure below we depict these three local rows and all columns (0-11).
3012 
3013 .vb
3014            0 1 2 3 4 5 6 7 8 9 10 11
3015           --------------------------
3016    row 3  |o o o d d d o o o o  o  o
3017    row 4  |o o o d d d o o o o  o  o
3018    row 5  |o o o d d d o o o o  o  o
3019           --------------------------
3020 .ve
3021 
3022    Thus, any entries in the d locations are stored in the d (diagonal)
3023    submatrix, and any entries in the o locations are stored in the
3024    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3025    stored simply in the MATSEQBAIJ format for compressed row storage.
3026 
3027    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3028    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3029    In general, for PDE problems in which most nonzeros are near the diagonal,
3030    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3031    or you will get TERRIBLE performance; see the users' manual chapter on
3032    matrices.
3033 
3034    You can call MatGetInfo() to get information on how effective the preallocation was;
3035    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3036    You can also run with the option -info and look for messages with the string
3037    malloc in them to see if additional memory allocation was needed.
3038 
3039    Level: intermediate
3040 
3041 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3042 @*/
3043 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3044 {
3045   PetscFunctionBegin;
3046   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3047   PetscValidType(B,1);
3048   PetscValidLogicalCollectiveInt(B,bs,2);
3049   PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 /*@C
3054    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3055    (block compressed row).  For good matrix assembly performance
3056    the user should preallocate the matrix storage by setting the parameters
3057    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3058    performance can be increased by more than a factor of 50.
3059 
3060    Collective
3061 
3062    Input Parameters:
3063 +  comm - MPI communicator
3064 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3065           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3066 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3067            This value should be the same as the local size used in creating the
3068            y vector for the matrix-vector product y = Ax.
3069 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3070            This value should be the same as the local size used in creating the
3071            x vector for the matrix-vector product y = Ax.
3072 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3073 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3074 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3075            submatrix  (same for all local rows)
3076 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3077            of the in diagonal portion of the local (possibly different for each block
3078            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3079            and set it even if it is zero.
3080 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3081            submatrix (same for all local rows).
3082 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3083            off-diagonal portion of the local submatrix (possibly different for
3084            each block row) or NULL.
3085 
3086    Output Parameter:
3087 .  A - the matrix
3088 
3089    Options Database Keys:
3090 +   -mat_block_size - size of the blocks to use
3091 -   -mat_use_hash_table <fact> - set hash table factor
3092 
3093    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3094    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3095    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3096 
3097    Notes:
3098    If the *_nnz parameter is given then the *_nz parameter is ignored
3099 
3100    A nonzero block is any block that as 1 or more nonzeros in it
3101 
3102    The user MUST specify either the local or global matrix dimensions
3103    (possibly both).
3104 
3105    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3106    than it must be used on all processors that share the object for that argument.
3107 
3108    Storage Information:
3109    For a square global matrix we define each processor's diagonal portion
3110    to be its local rows and the corresponding columns (a square submatrix);
3111    each processor's off-diagonal portion encompasses the remainder of the
3112    local matrix (a rectangular submatrix).
3113 
3114    The user can specify preallocated storage for the diagonal part of
3115    the local submatrix with either d_nz or d_nnz (not both).  Set
3116    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3117    memory allocation.  Likewise, specify preallocated storage for the
3118    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3119 
3120    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3121    the figure below we depict these three local rows and all columns (0-11).
3122 
3123 .vb
3124            0 1 2 3 4 5 6 7 8 9 10 11
3125           --------------------------
3126    row 3  |o o o d d d o o o o  o  o
3127    row 4  |o o o d d d o o o o  o  o
3128    row 5  |o o o d d d o o o o  o  o
3129           --------------------------
3130 .ve
3131 
3132    Thus, any entries in the d locations are stored in the d (diagonal)
3133    submatrix, and any entries in the o locations are stored in the
3134    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3135    stored simply in the MATSEQBAIJ format for compressed row storage.
3136 
3137    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3138    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3139    In general, for PDE problems in which most nonzeros are near the diagonal,
3140    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3141    or you will get TERRIBLE performance; see the users' manual chapter on
3142    matrices.
3143 
3144    Level: intermediate
3145 
3146 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3147 @*/
3148 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3149 {
3150   PetscMPIInt    size;
3151 
3152   PetscFunctionBegin;
3153   PetscCall(MatCreate(comm,A));
3154   PetscCall(MatSetSizes(*A,m,n,M,N));
3155   PetscCallMPI(MPI_Comm_size(comm,&size));
3156   if (size > 1) {
3157     PetscCall(MatSetType(*A,MATMPIBAIJ));
3158     PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
3159   } else {
3160     PetscCall(MatSetType(*A,MATSEQBAIJ));
3161     PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
3162   }
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3167 {
3168   Mat            mat;
3169   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3170   PetscInt       len=0;
3171 
3172   PetscFunctionBegin;
3173   *newmat = NULL;
3174   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
3175   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
3176   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
3177 
3178   mat->factortype   = matin->factortype;
3179   mat->preallocated = PETSC_TRUE;
3180   mat->assembled    = PETSC_TRUE;
3181   mat->insertmode   = NOT_SET_VALUES;
3182 
3183   a             = (Mat_MPIBAIJ*)mat->data;
3184   mat->rmap->bs = matin->rmap->bs;
3185   a->bs2        = oldmat->bs2;
3186   a->mbs        = oldmat->mbs;
3187   a->nbs        = oldmat->nbs;
3188   a->Mbs        = oldmat->Mbs;
3189   a->Nbs        = oldmat->Nbs;
3190 
3191   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
3192   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
3193 
3194   a->size         = oldmat->size;
3195   a->rank         = oldmat->rank;
3196   a->donotstash   = oldmat->donotstash;
3197   a->roworiented  = oldmat->roworiented;
3198   a->rowindices   = NULL;
3199   a->rowvalues    = NULL;
3200   a->getrowactive = PETSC_FALSE;
3201   a->barray       = NULL;
3202   a->rstartbs     = oldmat->rstartbs;
3203   a->rendbs       = oldmat->rendbs;
3204   a->cstartbs     = oldmat->cstartbs;
3205   a->cendbs       = oldmat->cendbs;
3206 
3207   /* hash table stuff */
3208   a->ht           = NULL;
3209   a->hd           = NULL;
3210   a->ht_size      = 0;
3211   a->ht_flag      = oldmat->ht_flag;
3212   a->ht_fact      = oldmat->ht_fact;
3213   a->ht_total_ct  = 0;
3214   a->ht_insert_ct = 0;
3215 
3216   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1));
3217   if (oldmat->colmap) {
3218 #if defined(PETSC_USE_CTABLE)
3219     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
3220 #else
3221     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
3222     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
3223     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
3224 #endif
3225   } else a->colmap = NULL;
3226 
3227   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3228     PetscCall(PetscMalloc1(len,&a->garray));
3229     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
3230     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
3231   } else a->garray = NULL;
3232 
3233   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
3234   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
3235   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
3236   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
3237   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
3238 
3239   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
3240   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
3241   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
3242   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
3243   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
3244   *newmat = mat;
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3249 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3250 {
3251   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3252   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3253   PetscScalar    *matvals;
3254 
3255   PetscFunctionBegin;
3256   PetscCall(PetscViewerSetUp(viewer));
3257 
3258   /* read in matrix header */
3259   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3260   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3261   M  = header[1]; N = header[2]; nz = header[3];
3262   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3263   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3264   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3265 
3266   /* set block sizes from the viewer's .info file */
3267   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3268   /* set local sizes if not set already */
3269   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3270   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3271   /* set global sizes if not set already */
3272   if (mat->rmap->N < 0) mat->rmap->N = M;
3273   if (mat->cmap->N < 0) mat->cmap->N = N;
3274   PetscCall(PetscLayoutSetUp(mat->rmap));
3275   PetscCall(PetscLayoutSetUp(mat->cmap));
3276 
3277   /* check if the matrix sizes are correct */
3278   PetscCall(MatGetSize(mat,&rows,&cols));
3279   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3280   PetscCall(MatGetBlockSize(mat,&bs));
3281   PetscCall(MatGetLocalSize(mat,&m,&n));
3282   PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL));
3283   PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce));
3284   mbs = m/bs; nbs = n/bs;
3285 
3286   /* read in row lengths and build row indices */
3287   PetscCall(PetscMalloc1(m+1,&rowidxs));
3288   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3289   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3290   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3291   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3292 
3293   /* read in column indices and matrix values */
3294   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3295   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3296   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3297 
3298   { /* preallocate matrix storage */
3299     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3300     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3301     PetscBool  sbaij,done;
3302     PetscInt   *d_nnz,*o_nnz;
3303 
3304     PetscCall(PetscBTCreate(nbs,&bt));
3305     PetscCall(PetscHSetICreate(&ht));
3306     PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz));
3307     PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij));
3308     for (i=0; i<mbs; i++) {
3309       PetscCall(PetscBTMemzero(nbs,bt));
3310       PetscCall(PetscHSetIClear(ht));
3311       for (k=0; k<bs; k++) {
3312         PetscInt row = bs*i + k;
3313         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3314           PetscInt col = colidxs[j];
3315           if (!sbaij || col >= row) {
3316             if (col >= cs && col < ce) {
3317               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3318             } else {
3319               PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done));
3320               if (done) o_nnz[i]++;
3321             }
3322           }
3323         }
3324       }
3325     }
3326     PetscCall(PetscBTDestroy(&bt));
3327     PetscCall(PetscHSetIDestroy(&ht));
3328     PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz));
3329     PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz));
3330     PetscCall(PetscFree2(d_nnz,o_nnz));
3331   }
3332 
3333   /* store matrix values */
3334   for (i=0; i<m; i++) {
3335     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3336     PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES));
3337   }
3338 
3339   PetscCall(PetscFree(rowidxs));
3340   PetscCall(PetscFree2(colidxs,matvals));
3341   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
3342   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
3343   PetscFunctionReturn(0);
3344 }
3345 
3346 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3347 {
3348   PetscBool isbinary;
3349 
3350   PetscFunctionBegin;
3351   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3352   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3353   PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer));
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 /*@
3358    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3359 
3360    Input Parameters:
3361 +  mat  - the matrix
3362 -  fact - factor
3363 
3364    Not Collective, each process can use a different factor
3365 
3366    Level: advanced
3367 
3368   Notes:
3369    This can also be set by the command line option: -mat_use_hash_table <fact>
3370 
3371 .seealso: `MatSetOption()`
3372 @*/
3373 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3374 {
3375   PetscFunctionBegin;
3376   PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3381 {
3382   Mat_MPIBAIJ *baij;
3383 
3384   PetscFunctionBegin;
3385   baij          = (Mat_MPIBAIJ*)mat->data;
3386   baij->ht_fact = fact;
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3391 {
3392   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3393   PetscBool    flg;
3394 
3395   PetscFunctionBegin;
3396   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg));
3397   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3398   if (Ad)     *Ad     = a->A;
3399   if (Ao)     *Ao     = a->B;
3400   if (colmap) *colmap = a->garray;
3401   PetscFunctionReturn(0);
3402 }
3403 
3404 /*
3405     Special version for direct calls from Fortran (to eliminate two function call overheads
3406 */
3407 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3408 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3409 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3410 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3411 #endif
3412 
3413 /*@C
3414   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3415 
3416   Collective on Mat
3417 
3418   Input Parameters:
3419 + mat - the matrix
3420 . min - number of input rows
3421 . im - input rows
3422 . nin - number of input columns
3423 . in - input columns
3424 . v - numerical values input
3425 - addvin - INSERT_VALUES or ADD_VALUES
3426 
3427   Notes:
3428     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3429 
3430   Level: advanced
3431 
3432 .seealso: `MatSetValuesBlocked()`
3433 @*/
3434 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3435 {
3436   /* convert input arguments to C version */
3437   Mat        mat  = *matin;
3438   PetscInt   m    = *min, n = *nin;
3439   InsertMode addv = *addvin;
3440 
3441   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3442   const MatScalar *value;
3443   MatScalar       *barray     = baij->barray;
3444   PetscBool       roworiented = baij->roworiented;
3445   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3446   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3447   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3448 
3449   PetscFunctionBegin;
3450   /* tasks normally handled by MatSetValuesBlocked() */
3451   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3452   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3453   PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3454   if (mat->assembled) {
3455     mat->was_assembled = PETSC_TRUE;
3456     mat->assembled     = PETSC_FALSE;
3457   }
3458   PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0));
3459 
3460   if (!barray) {
3461     PetscCall(PetscMalloc1(bs2,&barray));
3462     baij->barray = barray;
3463   }
3464 
3465   if (roworiented) stepval = (n-1)*bs;
3466   else stepval = (m-1)*bs;
3467 
3468   for (i=0; i<m; i++) {
3469     if (im[i] < 0) continue;
3470     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
3471     if (im[i] >= rstart && im[i] < rend) {
3472       row = im[i] - rstart;
3473       for (j=0; j<n; j++) {
3474         /* If NumCol = 1 then a copy is not required */
3475         if ((roworiented) && (n == 1)) {
3476           barray = (MatScalar*)v + i*bs2;
3477         } else if ((!roworiented) && (m == 1)) {
3478           barray = (MatScalar*)v + j*bs2;
3479         } else { /* Here a copy is required */
3480           if (roworiented) {
3481             value = v + i*(stepval+bs)*bs + j*bs;
3482           } else {
3483             value = v + j*(stepval+bs)*bs + i*bs;
3484           }
3485           for (ii=0; ii<bs; ii++,value+=stepval) {
3486             for (jj=0; jj<bs; jj++) {
3487               *barray++ = *value++;
3488             }
3489           }
3490           barray -=bs2;
3491         }
3492 
3493         if (in[j] >= cstart && in[j] < cend) {
3494           col  = in[j] - cstart;
3495           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
3496         } else if (in[j] < 0) continue;
3497         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
3498         else {
3499           if (mat->was_assembled) {
3500             if (!baij->colmap) {
3501               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3502             }
3503 
3504 #if defined(PETSC_USE_DEBUG)
3505 #if defined(PETSC_USE_CTABLE)
3506             { PetscInt data;
3507               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
3508               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3509             }
3510 #else
3511             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3512 #endif
3513 #endif
3514 #if defined(PETSC_USE_CTABLE)
3515             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
3516             col  = (col - 1)/bs;
3517 #else
3518             col = (baij->colmap[in[j]] - 1)/bs;
3519 #endif
3520             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3521               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3522               col  =  in[j];
3523             }
3524           } else col = in[j];
3525           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
3526         }
3527       }
3528     } else {
3529       if (!baij->donotstash) {
3530         if (roworiented) {
3531           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
3532         } else {
3533           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
3534         }
3535       }
3536     }
3537   }
3538 
3539   /* task normally handled by MatSetValuesBlocked() */
3540   PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0));
3541   PetscFunctionReturn(0);
3542 }
3543 
3544 /*@
3545      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3546          CSR format the local rows.
3547 
3548    Collective
3549 
3550    Input Parameters:
3551 +  comm - MPI communicator
3552 .  bs - the block size, only a block size of 1 is supported
3553 .  m - number of local rows (Cannot be PETSC_DECIDE)
3554 .  n - This value should be the same as the local size used in creating the
3555        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3556        calculated if N is given) For square matrices n is almost always m.
3557 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3558 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3559 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3560 .   j - column indices
3561 -   a - matrix values
3562 
3563    Output Parameter:
3564 .   mat - the matrix
3565 
3566    Level: intermediate
3567 
3568    Notes:
3569        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3570      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3571      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3572 
3573      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3574      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3575      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3576      with column-major ordering within blocks.
3577 
3578        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3579 
3580 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3581           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3582 @*/
3583 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3584 {
3585   PetscFunctionBegin;
3586   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3587   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3588   PetscCall(MatCreate(comm,mat));
3589   PetscCall(MatSetSizes(*mat,m,n,M,N));
3590   PetscCall(MatSetType(*mat,MATMPIBAIJ));
3591   PetscCall(MatSetBlockSize(*mat,bs));
3592   PetscCall(MatSetUp(*mat));
3593   PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE));
3594   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a));
3595   PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE));
3596   PetscFunctionReturn(0);
3597 }
3598 
3599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3600 {
3601   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3602   PetscInt       *indx;
3603   PetscScalar    *values;
3604 
3605   PetscFunctionBegin;
3606   PetscCall(MatGetSize(inmat,&m,&N));
3607   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3608     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3609     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3610     PetscInt       *bindx,rmax=a->rmax,j;
3611     PetscMPIInt    rank,size;
3612 
3613     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
3614     mbs = m/bs; Nbs = N/cbs;
3615     if (n == PETSC_DECIDE) {
3616       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
3617     }
3618     nbs = n/cbs;
3619 
3620     PetscCall(PetscMalloc1(rmax,&bindx));
3621     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
3622 
3623     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3624     PetscCallMPI(MPI_Comm_rank(comm,&size));
3625     if (rank == size-1) {
3626       /* Check sum(nbs) = Nbs */
3627       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
3628     }
3629 
3630     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3631     for (i=0; i<mbs; i++) {
3632       PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
3633       nnz = nnz/bs;
3634       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3635       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
3636       PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL));
3637     }
3638     PetscCall(PetscFree(bindx));
3639 
3640     PetscCall(MatCreate(comm,outmat));
3641     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
3642     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
3643     PetscCall(MatSetType(*outmat,MATBAIJ));
3644     PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz));
3645     PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
3646     MatPreallocateEnd(dnz,onz);
3647     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3648   }
3649 
3650   /* numeric phase */
3651   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
3652   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
3653 
3654   for (i=0; i<m; i++) {
3655     PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values));
3656     Ii   = i + rstart;
3657     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
3658     PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values));
3659   }
3660   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
3661   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
3662   PetscFunctionReturn(0);
3663 }
3664