xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b51a437600744b27e674134f831f57ac6dc85c95)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197 #if defined(PETSC_USE_DEBUG)
198     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
199 #endif
200     if (im[i] >= rstart_orig && im[i] < rend_orig) {
201       row = im[i] - rstart_orig;
202       for (j=0; j<n; j++) {
203         if (in[j] >= cstart_orig && in[j] < cend_orig) {
204           col = in[j] - cstart_orig;
205           if (roworiented) value = v[i*n+j];
206           else             value = v[i+j*m];
207           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
208         } else if (in[j] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
211 #endif
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
216             }
217 #if defined(PETSC_USE_CTABLE)
218             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
238         }
239       }
240     } else {
241       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
246         } else {
247           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscErrorCode    ierr;
261   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
262   PetscBool         roworiented=a->roworiented;
263   const PetscScalar *value     = v;
264   MatScalar         *ap,*aa = a->a,*bap;
265 
266   PetscFunctionBegin;
267   rp   = aj + ai[row];
268   ap   = aa + bs2*ai[row];
269   rmax = imax[row];
270   nrow = ailen[row];
271   value = v;
272   low = 0;
273   high = nrow;
274   while (high-low > 7) {
275     t = (low+high)/2;
276     if (rp[t] > col) high = t;
277     else             low  = t;
278   }
279   for (i=low; i<high; i++) {
280     if (rp[i] > col) break;
281     if (rp[i] == col) {
282       bap = ap +  bs2*i;
283       if (roworiented) {
284         if (is == ADD_VALUES) {
285           for (ii=0; ii<bs; ii++) {
286             for (jj=ii; jj<bs2; jj+=bs) {
287               bap[jj] += *value++;
288             }
289           }
290         } else {
291           for (ii=0; ii<bs; ii++) {
292             for (jj=ii; jj<bs2; jj+=bs) {
293               bap[jj] = *value++;
294             }
295           }
296         }
297       } else {
298         if (is == ADD_VALUES) {
299           for (ii=0; ii<bs; ii++,value+=bs) {
300             for (jj=0; jj<bs; jj++) {
301               bap[jj] += value[jj];
302             }
303             bap += bs;
304           }
305         } else {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj]  = value[jj];
309             }
310             bap += bs;
311           }
312         }
313       }
314       goto noinsert2;
315     }
316   }
317   if (nonew == 1) goto noinsert2;
318   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
319   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
320   N = nrow++ - 1; high++;
321   /* shift up all the later entries in this row */
322   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
323   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
324   rp[i] = col;
325   bap   = ap +  bs2*i;
326   if (roworiented) {
327     for (ii=0; ii<bs; ii++) {
328       for (jj=ii; jj<bs2; jj+=bs) {
329         bap[jj] = *value++;
330       }
331     }
332   } else {
333     for (ii=0; ii<bs; ii++) {
334       for (jj=0; jj<bs; jj++) {
335         *bap++ = *value++;
336       }
337     }
338   }
339   noinsert2:;
340   ailen[row] = nrow;
341   PetscFunctionReturn(0);
342 }
343 
344 /*
345     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
346     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
347 */
348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
349 {
350   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
351   const PetscScalar *value;
352   MatScalar         *barray     = baij->barray;
353   PetscBool         roworiented = baij->roworiented;
354   PetscErrorCode    ierr;
355   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
356   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
357   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
358 
359   PetscFunctionBegin;
360   if (!barray) {
361     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
362     baij->barray = barray;
363   }
364 
365   if (roworiented) stepval = (n-1)*bs;
366   else stepval = (m-1)*bs;
367 
368   for (i=0; i<m; i++) {
369     if (im[i] < 0) continue;
370 #if defined(PETSC_USE_DEBUG)
371     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
372 #endif
373     if (im[i] >= rstart && im[i] < rend) {
374       row = im[i] - rstart;
375       for (j=0; j<n; j++) {
376         /* If NumCol = 1 then a copy is not required */
377         if ((roworiented) && (n == 1)) {
378           barray = (MatScalar*)v + i*bs2;
379         } else if ((!roworiented) && (m == 1)) {
380           barray = (MatScalar*)v + j*bs2;
381         } else { /* Here a copy is required */
382           if (roworiented) {
383             value = v + (i*(stepval+bs) + j)*bs;
384           } else {
385             value = v + (j*(stepval+bs) + i)*bs;
386           }
387           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
388             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
389             barray += bs;
390           }
391           barray -= bs2;
392         }
393 
394         if (in[j] >= cstart && in[j] < cend) {
395           col  = in[j] - cstart;
396           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
397         } else if (in[j] < 0) continue;
398 #if defined(PETSC_USE_DEBUG)
399         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
400 #endif
401         else {
402           if (mat->was_assembled) {
403             if (!baij->colmap) {
404               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
405             }
406 
407 #if defined(PETSC_USE_DEBUG)
408 #if defined(PETSC_USE_CTABLE)
409             { PetscInt data;
410               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
411               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
412             }
413 #else
414             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
415 #endif
416 #endif
417 #if defined(PETSC_USE_CTABLE)
418             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
419             col  = (col - 1)/bs;
420 #else
421             col = (baij->colmap[in[j]] - 1)/bs;
422 #endif
423             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
424               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
425               col  =  in[j];
426             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
427           } else col = in[j];
428           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
429         }
430       }
431     } else {
432       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
433       if (!baij->donotstash) {
434         if (roworiented) {
435           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
436         } else {
437           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
438         }
439       }
440     }
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 #define HASH_KEY 0.6180339887
446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
450 {
451   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
452   PetscBool      roworiented = baij->roworiented;
453   PetscErrorCode ierr;
454   PetscInt       i,j,row,col;
455   PetscInt       rstart_orig=mat->rmap->rstart;
456   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
457   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
458   PetscReal      tmp;
459   MatScalar      **HD = baij->hd,value;
460 #if defined(PETSC_USE_DEBUG)
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 #endif
463 
464   PetscFunctionBegin;
465   for (i=0; i<m; i++) {
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
468     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
469 #endif
470     row = im[i];
471     if (row >= rstart_orig && row < rend_orig) {
472       for (j=0; j<n; j++) {
473         col = in[j];
474         if (roworiented) value = v[i*n+j];
475         else             value = v[i+j*m];
476         /* Look up PetscInto the Hash Table */
477         key = (row/bs)*Nbs+(col/bs)+1;
478         h1  = HASH(size,key,tmp);
479 
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         insert_ct++;
484         total_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         /* A HASH table entry is found, so insert the values at the correct address */
502         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
503         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
504       }
505     } else if (!baij->donotstash) {
506       if (roworiented) {
507         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
508       } else {
509         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
510       }
511     }
512   }
513 #if defined(PETSC_USE_DEBUG)
514   baij->ht_total_ct  += total_ct;
515   baij->ht_insert_ct += insert_ct;
516 #endif
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
521 {
522   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
523   PetscBool         roworiented = baij->roworiented;
524   PetscErrorCode    ierr;
525   PetscInt          i,j,ii,jj,row,col;
526   PetscInt          rstart=baij->rstartbs;
527   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
528   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
529   PetscReal         tmp;
530   MatScalar         **HD = baij->hd,*baij_a;
531   const PetscScalar *v_t,*value;
532 #if defined(PETSC_USE_DEBUG)
533   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
534 #endif
535 
536   PetscFunctionBegin;
537   if (roworiented) stepval = (n-1)*bs;
538   else stepval = (m-1)*bs;
539 
540   for (i=0; i<m; i++) {
541 #if defined(PETSC_USE_DEBUG)
542     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
543     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
544 #endif
545     row = im[i];
546     v_t = v + i*nbs2;
547     if (row >= rstart && row < rend) {
548       for (j=0; j<n; j++) {
549         col = in[j];
550 
551         /* Look up into the Hash Table */
552         key = row*Nbs+col+1;
553         h1  = HASH(size,key,tmp);
554 
555         idx = h1;
556 #if defined(PETSC_USE_DEBUG)
557         total_ct++;
558         insert_ct++;
559         if (HT[idx] != key) {
560           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
561           if (idx == size) {
562             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
563             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
564           }
565         }
566 #else
567         if (HT[idx] != key) {
568           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
569           if (idx == size) {
570             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
571             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
572           }
573         }
574 #endif
575         baij_a = HD[idx];
576         if (roworiented) {
577           /*value = v + i*(stepval+bs)*bs + j*bs;*/
578           /* value = v + (i*(stepval+bs)+j)*bs; */
579           value = v_t;
580           v_t  += bs;
581           if (addv == ADD_VALUES) {
582             for (ii=0; ii<bs; ii++,value+=stepval) {
583               for (jj=ii; jj<bs2; jj+=bs) {
584                 baij_a[jj] += *value++;
585               }
586             }
587           } else {
588             for (ii=0; ii<bs; ii++,value+=stepval) {
589               for (jj=ii; jj<bs2; jj+=bs) {
590                 baij_a[jj] = *value++;
591               }
592             }
593           }
594         } else {
595           value = v + j*(stepval+bs)*bs + i*bs;
596           if (addv == ADD_VALUES) {
597             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
598               for (jj=0; jj<bs; jj++) {
599                 baij_a[jj] += *value++;
600               }
601             }
602           } else {
603             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
604               for (jj=0; jj<bs; jj++) {
605                 baij_a[jj] = *value++;
606               }
607             }
608           }
609         }
610       }
611     } else {
612       if (!baij->donotstash) {
613         if (roworiented) {
614           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
615         } else {
616           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
617         }
618       }
619     }
620   }
621 #if defined(PETSC_USE_DEBUG)
622   baij->ht_total_ct  += total_ct;
623   baij->ht_insert_ct += insert_ct;
624 #endif
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
670 {
671   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
672   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
673   PetscErrorCode ierr;
674   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
675   PetscReal      sum = 0.0;
676   MatScalar      *v;
677 
678   PetscFunctionBegin;
679   if (baij->size == 1) {
680     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
681   } else {
682     if (type == NORM_FROBENIUS) {
683       v  = amat->a;
684       nz = amat->nz*bs2;
685       for (i=0; i<nz; i++) {
686         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
687       }
688       v  = bmat->a;
689       nz = bmat->nz*bs2;
690       for (i=0; i<nz; i++) {
691         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
692       }
693       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       *nrm = PetscSqrtReal(*nrm);
695     } else if (type == NORM_1) { /* max column sum */
696       PetscReal *tmp,*tmp2;
697       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
698       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
699       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
700       v    = amat->a; jj = amat->j;
701       for (i=0; i<amat->nz; i++) {
702         for (j=0; j<bs; j++) {
703           col = bs*(cstart + *jj) + j; /* column index */
704           for (row=0; row<bs; row++) {
705             tmp[col] += PetscAbsScalar(*v);  v++;
706           }
707         }
708         jj++;
709       }
710       v = bmat->a; jj = bmat->j;
711       for (i=0; i<bmat->nz; i++) {
712         for (j=0; j<bs; j++) {
713           col = bs*garray[*jj] + j;
714           for (row=0; row<bs; row++) {
715             tmp[col] += PetscAbsScalar(*v); v++;
716           }
717         }
718         jj++;
719       }
720       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
721       *nrm = 0.0;
722       for (j=0; j<mat->cmap->N; j++) {
723         if (tmp2[j] > *nrm) *nrm = tmp2[j];
724       }
725       ierr = PetscFree(tmp);CHKERRQ(ierr);
726       ierr = PetscFree(tmp2);CHKERRQ(ierr);
727     } else if (type == NORM_INFINITY) { /* max row sum */
728       PetscReal *sums;
729       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
730       sum  = 0.0;
731       for (j=0; j<amat->mbs; j++) {
732         for (row=0; row<bs; row++) sums[row] = 0.0;
733         v  = amat->a + bs2*amat->i[j];
734         nz = amat->i[j+1]-amat->i[j];
735         for (i=0; i<nz; i++) {
736           for (col=0; col<bs; col++) {
737             for (row=0; row<bs; row++) {
738               sums[row] += PetscAbsScalar(*v); v++;
739             }
740           }
741         }
742         v  = bmat->a + bs2*bmat->i[j];
743         nz = bmat->i[j+1]-bmat->i[j];
744         for (i=0; i<nz; i++) {
745           for (col=0; col<bs; col++) {
746             for (row=0; row<bs; row++) {
747               sums[row] += PetscAbsScalar(*v); v++;
748             }
749           }
750         }
751         for (row=0; row<bs; row++) {
752           if (sums[row] > sum) sum = sums[row];
753         }
754       }
755       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756       ierr = PetscFree(sums);CHKERRQ(ierr);
757     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 /*
763   Creates the hash table, and sets the table
764   This table is created only once.
765   If new entried need to be added to the matrix
766   then the hash table has to be destroyed and
767   recreated.
768 */
769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
770 {
771   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
772   Mat            A     = baij->A,B=baij->B;
773   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
774   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
775   PetscErrorCode ierr;
776   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
777   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
778   PetscInt       *HT,key;
779   MatScalar      **HD;
780   PetscReal      tmp;
781 #if defined(PETSC_USE_INFO)
782   PetscInt ct=0,max=0;
783 #endif
784 
785   PetscFunctionBegin;
786   if (baij->ht) PetscFunctionReturn(0);
787 
788   baij->ht_size = (PetscInt)(factor*nz);
789   ht_size       = baij->ht_size;
790 
791   /* Allocate Memory for Hash Table */
792   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
793   HD   = baij->hd;
794   HT   = baij->ht;
795 
796   /* Loop Over A */
797   for (i=0; i<a->mbs; i++) {
798     for (j=ai[i]; j<ai[i+1]; j++) {
799       row = i+rstart;
800       col = aj[j]+cstart;
801 
802       key = row*Nbs + col + 1;
803       h1  = HASH(ht_size,key,tmp);
804       for (k=0; k<ht_size; k++) {
805         if (!HT[(h1+k)%ht_size]) {
806           HT[(h1+k)%ht_size] = key;
807           HD[(h1+k)%ht_size] = a->a + j*bs2;
808           break;
809 #if defined(PETSC_USE_INFO)
810         } else {
811           ct++;
812 #endif
813         }
814       }
815 #if defined(PETSC_USE_INFO)
816       if (k> max) max = k;
817 #endif
818     }
819   }
820   /* Loop Over B */
821   for (i=0; i<b->mbs; i++) {
822     for (j=bi[i]; j<bi[i+1]; j++) {
823       row = i+rstart;
824       col = garray[bj[j]];
825       key = row*Nbs + col + 1;
826       h1  = HASH(ht_size,key,tmp);
827       for (k=0; k<ht_size; k++) {
828         if (!HT[(h1+k)%ht_size]) {
829           HT[(h1+k)%ht_size] = key;
830           HD[(h1+k)%ht_size] = b->a + j*bs2;
831           break;
832 #if defined(PETSC_USE_INFO)
833         } else {
834           ct++;
835 #endif
836         }
837       }
838 #if defined(PETSC_USE_INFO)
839       if (k> max) max = k;
840 #endif
841     }
842   }
843 
844   /* Print Summary */
845 #if defined(PETSC_USE_INFO)
846   for (i=0,j=0; i<ht_size; i++) {
847     if (HT[i]) j++;
848   }
849   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
850 #endif
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
857   PetscErrorCode ierr;
858   PetscInt       nstash,reallocs;
859 
860   PetscFunctionBegin;
861   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
862 
863   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
864   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
865   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
866   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
867   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
868   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
873 {
874   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
875   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
876   PetscErrorCode ierr;
877   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
878   PetscInt       *row,*col;
879   PetscBool      r1,r2,r3,other_disassembled;
880   MatScalar      *val;
881   PetscMPIInt    n;
882 
883   PetscFunctionBegin;
884   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
885   if (!baij->donotstash && !mat->nooffprocentries) {
886     while (1) {
887       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
888       if (!flg) break;
889 
890       for (i=0; i<n;) {
891         /* Now identify the consecutive vals belonging to the same row */
892         for (j=i,rstart=row[j]; j<n; j++) {
893           if (row[j] != rstart) break;
894         }
895         if (j < n) ncols = j-i;
896         else       ncols = n-i;
897         /* Now assemble all these values with a single function call */
898         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
899         i    = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
903     /* Now process the block-stash. Since the values are stashed column-oriented,
904        set the roworiented flag to column oriented, and after MatSetValues()
905        restore the original flags */
906     r1 = baij->roworiented;
907     r2 = a->roworiented;
908     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
909 
910     baij->roworiented = PETSC_FALSE;
911     a->roworiented    = PETSC_FALSE;
912 
913     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
914     while (1) {
915       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
916       if (!flg) break;
917 
918       for (i=0; i<n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j=i,rstart=row[j]; j<n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j-i;
924         else       ncols = n-i;
925         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
926         i    = j;
927       }
928     }
929     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
930 
931     baij->roworiented = r1;
932     a->roworiented    = r2;
933 
934     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
935   }
936 
937   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
938   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
939 
940   /* determine if any processor has disassembled, if so we must
941      also disassemble ourselfs, in order that we may reassemble. */
942   /*
943      if nonzero structure of submatrix B cannot change then we know that
944      no processor disassembled thus we can skip this stuff
945   */
946   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
947     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
948     if (mat->was_assembled && !other_disassembled) {
949       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
950     }
951   }
952 
953   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
954     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
955   }
956   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
957   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
958 
959 #if defined(PETSC_USE_INFO)
960   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
961     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
962 
963     baij->ht_total_ct  = 0;
964     baij->ht_insert_ct = 0;
965   }
966 #endif
967   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
968     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
969 
970     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
971     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
972   }
973 
974   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
975 
976   baij->rowvalues = 0;
977 
978   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
979   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
980     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
981     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
987 #include <petscdraw.h>
988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
989 {
990   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
991   PetscErrorCode    ierr;
992   PetscMPIInt       rank = baij->rank;
993   PetscInt          bs   = mat->rmap->bs;
994   PetscBool         iascii,isdraw;
995   PetscViewer       sviewer;
996   PetscViewerFormat format;
997 
998   PetscFunctionBegin;
999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1000   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1001   if (iascii) {
1002     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1003     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1004       MatInfo info;
1005       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1006       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1009                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1010       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1011       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1012       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1013       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1014       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1015       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1016       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1017       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1018       PetscFunctionReturn(0);
1019     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1020       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1021       PetscFunctionReturn(0);
1022     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1023       PetscFunctionReturn(0);
1024     }
1025   }
1026 
1027   if (isdraw) {
1028     PetscDraw draw;
1029     PetscBool isnull;
1030     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1031     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1032     if (isnull) PetscFunctionReturn(0);
1033   }
1034 
1035   {
1036     /* assemble the entire matrix onto first processor. */
1037     Mat         A;
1038     Mat_SeqBAIJ *Aloc;
1039     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1040     MatScalar   *a;
1041     const char  *matname;
1042 
1043     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1044     /* Perhaps this should be the type of mat? */
1045     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1048     } else {
1049       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1050     }
1051     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1052     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1053     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1054     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1055 
1056     /* copy over the A part */
1057     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1058     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1060 
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstartbs + i);
1063       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = (baij->cstartbs+aj[j])*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     /* copy over the B part */
1073     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1074     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1075     for (i=0; i<mbs; i++) {
1076       rvals[0] = bs*(baij->rstartbs + i);
1077       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1078       for (j=ai[i]; j<ai[i+1]; j++) {
1079         col = baij->garray[aj[j]]*bs;
1080         for (k=0; k<bs; k++) {
1081           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1082           col++; a += bs;
1083         }
1084       }
1085     }
1086     ierr = PetscFree(rvals);CHKERRQ(ierr);
1087     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089     /*
1090        Everyone has to call to draw the matrix since the graphics waits are
1091        synchronized across all processors that share the PetscDraw object
1092     */
1093     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1094     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1095     if (!rank) {
1096       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1097       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1098     }
1099     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1100     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A);CHKERRQ(ierr);
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1107 {
1108   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1109   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1110   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1111   const PetscInt *garray = aij->garray;
1112   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1113   PetscInt       *rowlens,*colidxs;
1114   PetscScalar    *matvals;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1119 
1120   M  = mat->rmap->N;
1121   N  = mat->cmap->N;
1122   m  = mat->rmap->n;
1123   rs = mat->rmap->rstart;
1124   cs = mat->cmap->rstart;
1125   bs = mat->rmap->bs;
1126   nz = bs*bs*(A->nz + B->nz);
1127 
1128   /* write matrix header */
1129   header[0] = MAT_FILE_CLASSID;
1130   header[1] = M; header[2] = N; header[3] = nz;
1131   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1132   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1133 
1134   /* fill in and store row lengths */
1135   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1136   for (cnt=0, i=0; i<A->mbs; i++)
1137     for (j=0; j<bs; j++)
1138       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1139   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1140   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1141 
1142   /* fill in and store column indices */
1143   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1144   for (cnt=0, i=0; i<A->mbs; i++) {
1145     for (k=0; k<bs; k++) {
1146       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1147         if (garray[B->j[jb]] > cs/bs) break;
1148         for (l=0; l<bs; l++)
1149           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1150       }
1151       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1152         for (l=0; l<bs; l++)
1153           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1154       for (; jb<B->i[i+1]; jb++)
1155         for (l=0; l<bs; l++)
1156           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1157     }
1158   }
1159   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1160   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1161   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1162 
1163   /* fill in and store nonzero values */
1164   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1165   for (cnt=0, i=0; i<A->mbs; i++) {
1166     for (k=0; k<bs; k++) {
1167       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1168         if (garray[B->j[jb]] > cs/bs) break;
1169         for (l=0; l<bs; l++)
1170           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1171       }
1172       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1173         for (l=0; l<bs; l++)
1174           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1175       for (; jb<B->i[i+1]; jb++)
1176         for (l=0; l<bs; l++)
1177           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];;
1178     }
1179   }
1180   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1181   ierr = PetscFree(matvals);CHKERRQ(ierr);
1182 
1183   /* write block size option to the viewer's .info file */
1184   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1189 {
1190   PetscErrorCode ierr;
1191   PetscBool      iascii,isdraw,issocket,isbinary;
1192 
1193   PetscFunctionBegin;
1194   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1195   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1196   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1197   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1198   if (iascii || isdraw || issocket) {
1199     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1200   } else if (isbinary) {
1201     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1202   }
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1207 {
1208   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1209   PetscErrorCode ierr;
1210 
1211   PetscFunctionBegin;
1212 #if defined(PETSC_USE_LOG)
1213   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1214 #endif
1215   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1216   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1217   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1218   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1219 #if defined(PETSC_USE_CTABLE)
1220   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1221 #else
1222   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1223 #endif
1224   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1225   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1226   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1227   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1228   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1229   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1230   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1231   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1232 
1233   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1242 #if defined(PETSC_HAVE_HYPRE)
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1244 #endif
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1251 {
1252   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1253   PetscErrorCode ierr;
1254   PetscInt       nt;
1255 
1256   PetscFunctionBegin;
1257   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1258   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1259   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1260   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1261   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1263   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1269 {
1270   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1275   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1276   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1282 {
1283   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287   /* do nondiagonal part */
1288   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1289   /* do local part */
1290   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1291   /* add partial results together */
1292   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1293   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1298 {
1299   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301 
1302   PetscFunctionBegin;
1303   /* do nondiagonal part */
1304   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1305   /* do local part */
1306   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1307   /* add partial results together */
1308   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1309   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 /*
1314   This only works correctly for square matrices where the subblock A->A is the
1315    diagonal block
1316 */
1317 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1318 {
1319   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1324   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1335   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1342   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1343   PetscErrorCode ierr;
1344   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1345   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1346   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1347 
1348   PetscFunctionBegin;
1349   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1350   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1351   mat->getrowactive = PETSC_TRUE;
1352 
1353   if (!mat->rowvalues && (idx || v)) {
1354     /*
1355         allocate enough space to hold information from the longest row.
1356     */
1357     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1358     PetscInt    max = 1,mbs = mat->mbs,tmp;
1359     for (i=0; i<mbs; i++) {
1360       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1361       if (max < tmp) max = tmp;
1362     }
1363     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1364   }
1365   lrow = row - brstart;
1366 
1367   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1368   if (!v)   {pvA = 0; pvB = 0;}
1369   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1370   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1371   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1372   nztot = nzA + nzB;
1373 
1374   cmap = mat->garray;
1375   if (v  || idx) {
1376     if (nztot) {
1377       /* Sort by increasing column numbers, assuming A and B already sorted */
1378       PetscInt imark = -1;
1379       if (v) {
1380         *v = v_p = mat->rowvalues;
1381         for (i=0; i<nzB; i++) {
1382           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1383           else break;
1384         }
1385         imark = i;
1386         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1387         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1388       }
1389       if (idx) {
1390         *idx = idx_p = mat->rowindices;
1391         if (imark > -1) {
1392           for (i=0; i<imark; i++) {
1393             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1394           }
1395         } else {
1396           for (i=0; i<nzB; i++) {
1397             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1398             else break;
1399           }
1400           imark = i;
1401         }
1402         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1403         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1404       }
1405     } else {
1406       if (idx) *idx = 0;
1407       if (v)   *v   = 0;
1408     }
1409   }
1410   *nz  = nztot;
1411   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1412   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1417 {
1418   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1419 
1420   PetscFunctionBegin;
1421   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1422   baij->getrowactive = PETSC_FALSE;
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1427 {
1428   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1429   PetscErrorCode ierr;
1430 
1431   PetscFunctionBegin;
1432   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1433   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1438 {
1439   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1440   Mat            A  = a->A,B = a->B;
1441   PetscErrorCode ierr;
1442   PetscLogDouble isend[5],irecv[5];
1443 
1444   PetscFunctionBegin;
1445   info->block_size = (PetscReal)matin->rmap->bs;
1446 
1447   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1448 
1449   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1450   isend[3] = info->memory;  isend[4] = info->mallocs;
1451 
1452   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1453 
1454   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1455   isend[3] += info->memory;  isend[4] += info->mallocs;
1456 
1457   if (flag == MAT_LOCAL) {
1458     info->nz_used      = isend[0];
1459     info->nz_allocated = isend[1];
1460     info->nz_unneeded  = isend[2];
1461     info->memory       = isend[3];
1462     info->mallocs      = isend[4];
1463   } else if (flag == MAT_GLOBAL_MAX) {
1464     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else if (flag == MAT_GLOBAL_SUM) {
1472     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1473 
1474     info->nz_used      = irecv[0];
1475     info->nz_allocated = irecv[1];
1476     info->nz_unneeded  = irecv[2];
1477     info->memory       = irecv[3];
1478     info->mallocs      = irecv[4];
1479   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1480   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1481   info->fill_ratio_needed = 0;
1482   info->factor_mallocs    = 0;
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1487 {
1488   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1489   PetscErrorCode ierr;
1490 
1491   PetscFunctionBegin;
1492   switch (op) {
1493   case MAT_NEW_NONZERO_LOCATIONS:
1494   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1495   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1496   case MAT_KEEP_NONZERO_PATTERN:
1497   case MAT_NEW_NONZERO_LOCATION_ERR:
1498     MatCheckPreallocated(A,1);
1499     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1500     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1501     break;
1502   case MAT_ROW_ORIENTED:
1503     MatCheckPreallocated(A,1);
1504     a->roworiented = flg;
1505 
1506     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1507     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1508     break;
1509   case MAT_NEW_DIAGONALS:
1510   case MAT_SORTED_FULL:
1511     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1512     break;
1513   case MAT_IGNORE_OFF_PROC_ENTRIES:
1514     a->donotstash = flg;
1515     break;
1516   case MAT_USE_HASH_TABLE:
1517     a->ht_flag = flg;
1518     a->ht_fact = 1.39;
1519     break;
1520   case MAT_SYMMETRIC:
1521   case MAT_STRUCTURALLY_SYMMETRIC:
1522   case MAT_HERMITIAN:
1523   case MAT_SUBMAT_SINGLEIS:
1524   case MAT_SYMMETRY_ETERNAL:
1525     MatCheckPreallocated(A,1);
1526     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1527     break;
1528   default:
1529     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1530   }
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1535 {
1536   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1537   Mat_SeqBAIJ    *Aloc;
1538   Mat            B;
1539   PetscErrorCode ierr;
1540   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1541   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1542   MatScalar      *a;
1543 
1544   PetscFunctionBegin;
1545   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1546     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1547     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1548     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1549     /* Do not know preallocation information, but must set block size */
1550     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1551   } else {
1552     B = *matout;
1553   }
1554 
1555   /* copy over the A part */
1556   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1557   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1558   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1559 
1560   for (i=0; i<mbs; i++) {
1561     rvals[0] = bs*(baij->rstartbs + i);
1562     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1563     for (j=ai[i]; j<ai[i+1]; j++) {
1564       col = (baij->cstartbs+aj[j])*bs;
1565       for (k=0; k<bs; k++) {
1566         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1567 
1568         col++; a += bs;
1569       }
1570     }
1571   }
1572   /* copy over the B part */
1573   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1574   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1575   for (i=0; i<mbs; i++) {
1576     rvals[0] = bs*(baij->rstartbs + i);
1577     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1578     for (j=ai[i]; j<ai[i+1]; j++) {
1579       col = baij->garray[aj[j]]*bs;
1580       for (k=0; k<bs; k++) {
1581         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1582         col++;
1583         a += bs;
1584       }
1585     }
1586   }
1587   ierr = PetscFree(rvals);CHKERRQ(ierr);
1588   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1589   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1590 
1591   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1592   else {
1593     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1599 {
1600   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1601   Mat            a     = baij->A,b = baij->B;
1602   PetscErrorCode ierr;
1603   PetscInt       s1,s2,s3;
1604 
1605   PetscFunctionBegin;
1606   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1607   if (rr) {
1608     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1609     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1610     /* Overlap communication with computation. */
1611     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612   }
1613   if (ll) {
1614     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1615     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1616     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1617   }
1618   /* scale  the diagonal block */
1619   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1620 
1621   if (rr) {
1622     /* Do a scatter end and then right scale the off-diagonal block */
1623     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1630 {
1631   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1632   PetscInt      *lrows;
1633   PetscInt       r, len;
1634   PetscBool      cong;
1635   PetscErrorCode ierr;
1636 
1637   PetscFunctionBegin;
1638   /* get locally owned rows */
1639   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1640   /* fix right hand side if needed */
1641   if (x && b) {
1642     const PetscScalar *xx;
1643     PetscScalar       *bb;
1644 
1645     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1646     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1647     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1648     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1649     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1650   }
1651 
1652   /* actually zap the local rows */
1653   /*
1654         Zero the required rows. If the "diagonal block" of the matrix
1655      is square and the user wishes to set the diagonal we use separate
1656      code so that MatSetValues() is not called for each diagonal allocating
1657      new memory, thus calling lots of mallocs and slowing things down.
1658 
1659   */
1660   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1661   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1662   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1663   if ((diag != 0.0) && cong) {
1664     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1665   } else if (diag != 0.0) {
1666     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1667     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1668        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1669     for (r = 0; r < len; ++r) {
1670       const PetscInt row = lrows[r] + A->rmap->rstart;
1671       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1672     }
1673     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1674     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1675   } else {
1676     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1677   }
1678   ierr = PetscFree(lrows);CHKERRQ(ierr);
1679 
1680   /* only change matrix nonzero state if pattern was allowed to be changed */
1681   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1682     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1683     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1689 {
1690   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1691   PetscErrorCode    ierr;
1692   PetscMPIInt       n = A->rmap->n,p = 0;
1693   PetscInt          i,j,k,r,len = 0,row,col,count;
1694   PetscInt          *lrows,*owners = A->rmap->range;
1695   PetscSFNode       *rrows;
1696   PetscSF           sf;
1697   const PetscScalar *xx;
1698   PetscScalar       *bb,*mask;
1699   Vec               xmask,lmask;
1700   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1701   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1702   PetscScalar       *aa;
1703 
1704   PetscFunctionBegin;
1705   /* Create SF where leaves are input rows and roots are owned rows */
1706   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1707   for (r = 0; r < n; ++r) lrows[r] = -1;
1708   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1709   for (r = 0; r < N; ++r) {
1710     const PetscInt idx   = rows[r];
1711     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1712     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1713       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1714     }
1715     rrows[r].rank  = p;
1716     rrows[r].index = rows[r] - owners[p];
1717   }
1718   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1719   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1720   /* Collect flags for rows to be zeroed */
1721   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1722   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1723   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1724   /* Compress and put in row numbers */
1725   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1726   /* zero diagonal part of matrix */
1727   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1728   /* handle off diagonal part of matrix */
1729   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1730   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1731   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1732   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1733   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1734   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1735   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1737   if (x) {
1738     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1739     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1740     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1741     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1742   }
1743   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1744   /* remove zeroed rows of off diagonal matrix */
1745   for (i = 0; i < len; ++i) {
1746     row   = lrows[i];
1747     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1748     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1749     for (k = 0; k < count; ++k) {
1750       aa[0] = 0.0;
1751       aa   += bs;
1752     }
1753   }
1754   /* loop over all elements of off process part of matrix zeroing removed columns*/
1755   for (i = 0; i < l->B->rmap->N; ++i) {
1756     row = i/bs;
1757     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1758       for (k = 0; k < bs; ++k) {
1759         col = bs*baij->j[j] + k;
1760         if (PetscAbsScalar(mask[col])) {
1761           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1762           if (x) bb[i] -= aa[0]*xx[col];
1763           aa[0] = 0.0;
1764         }
1765       }
1766     }
1767   }
1768   if (x) {
1769     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1770     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1771   }
1772   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1773   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1774   ierr = PetscFree(lrows);CHKERRQ(ierr);
1775 
1776   /* only change matrix nonzero state if pattern was allowed to be changed */
1777   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1778     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1779     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1780   }
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1785 {
1786   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1787   PetscErrorCode ierr;
1788 
1789   PetscFunctionBegin;
1790   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1795 
1796 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1797 {
1798   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1799   Mat            a,b,c,d;
1800   PetscBool      flg;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   a = matA->A; b = matA->B;
1805   c = matB->A; d = matB->B;
1806 
1807   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1808   if (flg) {
1809     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1810   }
1811   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1816 {
1817   PetscErrorCode ierr;
1818   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1819   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1820 
1821   PetscFunctionBegin;
1822   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1823   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1824     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1825   } else {
1826     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1827     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1828   }
1829   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1834 {
1835   PetscErrorCode ierr;
1836 
1837   PetscFunctionBegin;
1838   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1843 {
1844   PetscErrorCode ierr;
1845   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1846   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1847   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1848 
1849   PetscFunctionBegin;
1850   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1855 {
1856   PetscErrorCode ierr;
1857   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1858   PetscBLASInt   bnz,one=1;
1859   Mat_SeqBAIJ    *x,*y;
1860   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1861 
1862   PetscFunctionBegin;
1863   if (str == SAME_NONZERO_PATTERN) {
1864     PetscScalar alpha = a;
1865     x    = (Mat_SeqBAIJ*)xx->A->data;
1866     y    = (Mat_SeqBAIJ*)yy->A->data;
1867     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1868     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1869     x    = (Mat_SeqBAIJ*)xx->B->data;
1870     y    = (Mat_SeqBAIJ*)yy->B->data;
1871     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1872     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1873     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1874   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1875     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1876   } else {
1877     Mat      B;
1878     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1879     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1880     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1881     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1882     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1883     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1884     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1885     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1886     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1887     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1888     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1889     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1890     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1891     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1892     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1893     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1899 {
1900   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1905   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1910 {
1911   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1916   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1921 {
1922   PetscErrorCode ierr;
1923   IS             iscol_local;
1924   PetscInt       csize;
1925 
1926   PetscFunctionBegin;
1927   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1928   if (call == MAT_REUSE_MATRIX) {
1929     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1930     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1931   } else {
1932     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1933   }
1934   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1935   if (call == MAT_INITIAL_MATRIX) {
1936     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1937     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 /*
1943   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1944   in local and then by concatenating the local matrices the end result.
1945   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1946   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1947 */
1948 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1949 {
1950   PetscErrorCode ierr;
1951   PetscMPIInt    rank,size;
1952   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1953   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1954   Mat            M,Mreuse;
1955   MatScalar      *vwork,*aa;
1956   MPI_Comm       comm;
1957   IS             isrow_new, iscol_new;
1958   Mat_SeqBAIJ    *aij;
1959 
1960   PetscFunctionBegin;
1961   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1962   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1963   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1964   /* The compression and expansion should be avoided. Doesn't point
1965      out errors, might change the indices, hence buggey */
1966   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1967   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1968 
1969   if (call ==  MAT_REUSE_MATRIX) {
1970     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1971     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1972     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1973   } else {
1974     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1975   }
1976   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1977   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1978   /*
1979       m - number of local rows
1980       n - number of columns (same on all processors)
1981       rstart - first row in new global matrix generated
1982   */
1983   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1984   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1985   m    = m/bs;
1986   n    = n/bs;
1987 
1988   if (call == MAT_INITIAL_MATRIX) {
1989     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1990     ii  = aij->i;
1991     jj  = aij->j;
1992 
1993     /*
1994         Determine the number of non-zeros in the diagonal and off-diagonal
1995         portions of the matrix in order to do correct preallocation
1996     */
1997 
1998     /* first get start and end of "diagonal" columns */
1999     if (csize == PETSC_DECIDE) {
2000       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2001       if (mglobal == n*bs) { /* square matrix */
2002         nlocal = m;
2003       } else {
2004         nlocal = n/size + ((n % size) > rank);
2005       }
2006     } else {
2007       nlocal = csize/bs;
2008     }
2009     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2010     rstart = rend - nlocal;
2011     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2012 
2013     /* next, compute all the lengths */
2014     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2015     for (i=0; i<m; i++) {
2016       jend = ii[i+1] - ii[i];
2017       olen = 0;
2018       dlen = 0;
2019       for (j=0; j<jend; j++) {
2020         if (*jj < rstart || *jj >= rend) olen++;
2021         else dlen++;
2022         jj++;
2023       }
2024       olens[i] = olen;
2025       dlens[i] = dlen;
2026     }
2027     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2028     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2029     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2030     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2031     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2032     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2033   } else {
2034     PetscInt ml,nl;
2035 
2036     M    = *newmat;
2037     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2038     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2039     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2040     /*
2041          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2042        rather than the slower MatSetValues().
2043     */
2044     M->was_assembled = PETSC_TRUE;
2045     M->assembled     = PETSC_FALSE;
2046   }
2047   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2048   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2049   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2050   ii   = aij->i;
2051   jj   = aij->j;
2052   aa   = aij->a;
2053   for (i=0; i<m; i++) {
2054     row   = rstart/bs + i;
2055     nz    = ii[i+1] - ii[i];
2056     cwork = jj;     jj += nz;
2057     vwork = aa;     aa += nz*bs*bs;
2058     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2059   }
2060 
2061   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   *newmat = M;
2064 
2065   /* save submatrix used in processor for next request */
2066   if (call ==  MAT_INITIAL_MATRIX) {
2067     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2068     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2074 {
2075   MPI_Comm       comm,pcomm;
2076   PetscInt       clocal_size,nrows;
2077   const PetscInt *rows;
2078   PetscMPIInt    size;
2079   IS             crowp,lcolp;
2080   PetscErrorCode ierr;
2081 
2082   PetscFunctionBegin;
2083   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2084   /* make a collective version of 'rowp' */
2085   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2086   if (pcomm==comm) {
2087     crowp = rowp;
2088   } else {
2089     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2090     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2091     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2092     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2093   }
2094   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2095   /* make a local version of 'colp' */
2096   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2097   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2098   if (size==1) {
2099     lcolp = colp;
2100   } else {
2101     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2102   }
2103   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2104   /* now we just get the submatrix */
2105   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2106   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2107   /* clean up */
2108   if (pcomm!=comm) {
2109     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2110   }
2111   if (size>1) {
2112     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2113   }
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2118 {
2119   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2120   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2121 
2122   PetscFunctionBegin;
2123   if (nghosts) *nghosts = B->nbs;
2124   if (ghosts) *ghosts = baij->garray;
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2129 {
2130   Mat            B;
2131   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2132   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2133   Mat_SeqAIJ     *b;
2134   PetscErrorCode ierr;
2135   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2136   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2137   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2138 
2139   PetscFunctionBegin;
2140   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2141   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2142 
2143   /* ----------------------------------------------------------------
2144      Tell every processor the number of nonzeros per row
2145   */
2146   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2147   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2148     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2149   }
2150   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2151   displs    = recvcounts + size;
2152   for (i=0; i<size; i++) {
2153     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2154     displs[i]     = A->rmap->range[i]/bs;
2155   }
2156 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2157   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2158 #else
2159   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2160   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2161 #endif
2162   /* ---------------------------------------------------------------
2163      Create the sequential matrix of the same type as the local block diagonal
2164   */
2165   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2166   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2167   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2168   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2169   b    = (Mat_SeqAIJ*)B->data;
2170 
2171   /*--------------------------------------------------------------------
2172     Copy my part of matrix column indices over
2173   */
2174   sendcount  = ad->nz + bd->nz;
2175   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2176   a_jsendbuf = ad->j;
2177   b_jsendbuf = bd->j;
2178   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2179   cnt        = 0;
2180   for (i=0; i<n; i++) {
2181 
2182     /* put in lower diagonal portion */
2183     m = bd->i[i+1] - bd->i[i];
2184     while (m > 0) {
2185       /* is it above diagonal (in bd (compressed) numbering) */
2186       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2187       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2188       m--;
2189     }
2190 
2191     /* put in diagonal portion */
2192     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2193       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2194     }
2195 
2196     /* put in upper diagonal portion */
2197     while (m-- > 0) {
2198       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2199     }
2200   }
2201   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2202 
2203   /*--------------------------------------------------------------------
2204     Gather all column indices to all processors
2205   */
2206   for (i=0; i<size; i++) {
2207     recvcounts[i] = 0;
2208     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2209       recvcounts[i] += lens[j];
2210     }
2211   }
2212   displs[0] = 0;
2213   for (i=1; i<size; i++) {
2214     displs[i] = displs[i-1] + recvcounts[i-1];
2215   }
2216 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2217   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2218 #else
2219   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2220 #endif
2221   /*--------------------------------------------------------------------
2222     Assemble the matrix into useable form (note numerical values not yet set)
2223   */
2224   /* set the b->ilen (length of each row) values */
2225   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2226   /* set the b->i indices */
2227   b->i[0] = 0;
2228   for (i=1; i<=A->rmap->N/bs; i++) {
2229     b->i[i] = b->i[i-1] + lens[i-1];
2230   }
2231   ierr = PetscFree(lens);CHKERRQ(ierr);
2232   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2233   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2234   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2235 
2236   if (A->symmetric) {
2237     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2238   } else if (A->hermitian) {
2239     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2240   } else if (A->structurally_symmetric) {
2241     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2242   }
2243   *newmat = B;
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2248 {
2249   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2250   PetscErrorCode ierr;
2251   Vec            bb1 = 0;
2252 
2253   PetscFunctionBegin;
2254   if (flag == SOR_APPLY_UPPER) {
2255     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2256     PetscFunctionReturn(0);
2257   }
2258 
2259   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2260     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2261   }
2262 
2263   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2264     if (flag & SOR_ZERO_INITIAL_GUESS) {
2265       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2266       its--;
2267     }
2268 
2269     while (its--) {
2270       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2271       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2272 
2273       /* update rhs: bb1 = bb - B*x */
2274       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2275       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2276 
2277       /* local sweep */
2278       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2279     }
2280   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2281     if (flag & SOR_ZERO_INITIAL_GUESS) {
2282       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2283       its--;
2284     }
2285     while (its--) {
2286       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2287       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2288 
2289       /* update rhs: bb1 = bb - B*x */
2290       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2291       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2292 
2293       /* local sweep */
2294       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2295     }
2296   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2297     if (flag & SOR_ZERO_INITIAL_GUESS) {
2298       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2299       its--;
2300     }
2301     while (its--) {
2302       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2303       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2304 
2305       /* update rhs: bb1 = bb - B*x */
2306       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2307       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2308 
2309       /* local sweep */
2310       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2311     }
2312   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2313 
2314   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2319 {
2320   PetscErrorCode ierr;
2321   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2322   PetscInt       N,i,*garray = aij->garray;
2323   PetscInt       ib,jb,bs = A->rmap->bs;
2324   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2325   MatScalar      *a_val = a_aij->a;
2326   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2327   MatScalar      *b_val = b_aij->a;
2328   PetscReal      *work;
2329 
2330   PetscFunctionBegin;
2331   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2332   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2333   if (type == NORM_2) {
2334     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2335       for (jb=0; jb<bs; jb++) {
2336         for (ib=0; ib<bs; ib++) {
2337           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2338           a_val++;
2339         }
2340       }
2341     }
2342     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2343       for (jb=0; jb<bs; jb++) {
2344         for (ib=0; ib<bs; ib++) {
2345           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2346           b_val++;
2347         }
2348       }
2349     }
2350   } else if (type == NORM_1) {
2351     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2352       for (jb=0; jb<bs; jb++) {
2353         for (ib=0; ib<bs; ib++) {
2354           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2355           a_val++;
2356         }
2357       }
2358     }
2359     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2360       for (jb=0; jb<bs; jb++) {
2361        for (ib=0; ib<bs; ib++) {
2362           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2363           b_val++;
2364         }
2365       }
2366     }
2367   } else if (type == NORM_INFINITY) {
2368     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2369       for (jb=0; jb<bs; jb++) {
2370         for (ib=0; ib<bs; ib++) {
2371           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2372           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2373           a_val++;
2374         }
2375       }
2376     }
2377     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2378       for (jb=0; jb<bs; jb++) {
2379         for (ib=0; ib<bs; ib++) {
2380           int col = garray[b_aij->j[i]] * bs + jb;
2381           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2382           b_val++;
2383         }
2384       }
2385     }
2386   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2387   if (type == NORM_INFINITY) {
2388     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2389   } else {
2390     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2391   }
2392   ierr = PetscFree(work);CHKERRQ(ierr);
2393   if (type == NORM_2) {
2394     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2400 {
2401   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2402   PetscErrorCode ierr;
2403 
2404   PetscFunctionBegin;
2405   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2406   A->factorerrortype             = a->A->factorerrortype;
2407   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2408   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2413 {
2414   PetscErrorCode ierr;
2415   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2416   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2417 
2418   PetscFunctionBegin;
2419   if (!Y->preallocated) {
2420     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2421   } else if (!aij->nz) {
2422     PetscInt nonew = aij->nonew;
2423     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2424     aij->nonew = nonew;
2425   }
2426   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2431 {
2432   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2433   PetscErrorCode ierr;
2434 
2435   PetscFunctionBegin;
2436   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2437   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2438   if (d) {
2439     PetscInt rstart;
2440     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2441     *d += rstart/A->rmap->bs;
2442 
2443   }
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2448 {
2449   PetscFunctionBegin;
2450   *a = ((Mat_MPIBAIJ*)A->data)->A;
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 /* -------------------------------------------------------------------*/
2455 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2456                                        MatGetRow_MPIBAIJ,
2457                                        MatRestoreRow_MPIBAIJ,
2458                                        MatMult_MPIBAIJ,
2459                                 /* 4*/ MatMultAdd_MPIBAIJ,
2460                                        MatMultTranspose_MPIBAIJ,
2461                                        MatMultTransposeAdd_MPIBAIJ,
2462                                        0,
2463                                        0,
2464                                        0,
2465                                 /*10*/ 0,
2466                                        0,
2467                                        0,
2468                                        MatSOR_MPIBAIJ,
2469                                        MatTranspose_MPIBAIJ,
2470                                 /*15*/ MatGetInfo_MPIBAIJ,
2471                                        MatEqual_MPIBAIJ,
2472                                        MatGetDiagonal_MPIBAIJ,
2473                                        MatDiagonalScale_MPIBAIJ,
2474                                        MatNorm_MPIBAIJ,
2475                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2476                                        MatAssemblyEnd_MPIBAIJ,
2477                                        MatSetOption_MPIBAIJ,
2478                                        MatZeroEntries_MPIBAIJ,
2479                                 /*24*/ MatZeroRows_MPIBAIJ,
2480                                        0,
2481                                        0,
2482                                        0,
2483                                        0,
2484                                 /*29*/ MatSetUp_MPIBAIJ,
2485                                        0,
2486                                        0,
2487                                        MatGetDiagonalBlock_MPIBAIJ,
2488                                        0,
2489                                 /*34*/ MatDuplicate_MPIBAIJ,
2490                                        0,
2491                                        0,
2492                                        0,
2493                                        0,
2494                                 /*39*/ MatAXPY_MPIBAIJ,
2495                                        MatCreateSubMatrices_MPIBAIJ,
2496                                        MatIncreaseOverlap_MPIBAIJ,
2497                                        MatGetValues_MPIBAIJ,
2498                                        MatCopy_MPIBAIJ,
2499                                 /*44*/ 0,
2500                                        MatScale_MPIBAIJ,
2501                                        MatShift_MPIBAIJ,
2502                                        0,
2503                                        MatZeroRowsColumns_MPIBAIJ,
2504                                 /*49*/ 0,
2505                                        0,
2506                                        0,
2507                                        0,
2508                                        0,
2509                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2510                                        0,
2511                                        MatSetUnfactored_MPIBAIJ,
2512                                        MatPermute_MPIBAIJ,
2513                                        MatSetValuesBlocked_MPIBAIJ,
2514                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2515                                        MatDestroy_MPIBAIJ,
2516                                        MatView_MPIBAIJ,
2517                                        0,
2518                                        0,
2519                                 /*64*/ 0,
2520                                        0,
2521                                        0,
2522                                        0,
2523                                        0,
2524                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                 /*74*/ 0,
2530                                        MatFDColoringApply_BAIJ,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                 /*79*/ 0,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                        MatLoad_MPIBAIJ,
2539                                 /*84*/ 0,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                 /*89*/ 0,
2545                                        0,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                 /*94*/ 0,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                 /*99*/ 0,
2555                                        0,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                 /*104*/0,
2560                                        MatRealPart_MPIBAIJ,
2561                                        MatImaginaryPart_MPIBAIJ,
2562                                        0,
2563                                        0,
2564                                 /*109*/0,
2565                                        0,
2566                                        0,
2567                                        0,
2568                                        MatMissingDiagonal_MPIBAIJ,
2569                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2570                                        0,
2571                                        MatGetGhosts_MPIBAIJ,
2572                                        0,
2573                                        0,
2574                                 /*119*/0,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                        MatGetMultiProcBlock_MPIBAIJ,
2579                                 /*124*/0,
2580                                        MatGetColumnNorms_MPIBAIJ,
2581                                        MatInvertBlockDiagonal_MPIBAIJ,
2582                                        0,
2583                                        0,
2584                                /*129*/ 0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                /*134*/ 0,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                /*139*/ MatSetBlockSizes_Default,
2595                                        0,
2596                                        0,
2597                                        MatFDColoringSetUp_MPIXAIJ,
2598                                        0,
2599                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2600 };
2601 
2602 
2603 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2604 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2605 
2606 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2607 {
2608   PetscInt       m,rstart,cstart,cend;
2609   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2610   const PetscInt *JJ    =0;
2611   PetscScalar    *values=0;
2612   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2613   PetscErrorCode ierr;
2614   PetscBool      nooffprocentries;
2615 
2616   PetscFunctionBegin;
2617   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2618   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2619   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2620   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2621   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2622   m      = B->rmap->n/bs;
2623   rstart = B->rmap->rstart/bs;
2624   cstart = B->cmap->rstart/bs;
2625   cend   = B->cmap->rend/bs;
2626 
2627   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2628   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2629   for (i=0; i<m; i++) {
2630     nz = ii[i+1] - ii[i];
2631     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2632     nz_max = PetscMax(nz_max,nz);
2633     dlen   = 0;
2634     olen   = 0;
2635     JJ     = jj + ii[i];
2636     for (j=0; j<nz; j++) {
2637       if (*JJ < cstart || *JJ >= cend) olen++;
2638       else dlen++;
2639       JJ++;
2640     }
2641     d_nnz[i] = dlen;
2642     o_nnz[i] = olen;
2643   }
2644   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2645   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2646 
2647   values = (PetscScalar*)V;
2648   if (!values) {
2649     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2650   }
2651   for (i=0; i<m; i++) {
2652     PetscInt          row    = i + rstart;
2653     PetscInt          ncols  = ii[i+1] - ii[i];
2654     const PetscInt    *icols = jj + ii[i];
2655     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2656       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2657       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2658     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2659       PetscInt j;
2660       for (j=0; j<ncols; j++) {
2661         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2662         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2663       }
2664     }
2665   }
2666 
2667   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2668   nooffprocentries    = B->nooffprocentries;
2669   B->nooffprocentries = PETSC_TRUE;
2670   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2671   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2672   B->nooffprocentries = nooffprocentries;
2673 
2674   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 /*@C
2679    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2680 
2681    Collective
2682 
2683    Input Parameters:
2684 +  B - the matrix
2685 .  bs - the block size
2686 .  i - the indices into j for the start of each local row (starts with zero)
2687 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2688 -  v - optional values in the matrix
2689 
2690    Level: advanced
2691 
2692    Notes:
2693     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2694    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2695    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2696    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2697    block column and the second index is over columns within a block.
2698 
2699    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2700 
2701 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2702 @*/
2703 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2704 {
2705   PetscErrorCode ierr;
2706 
2707   PetscFunctionBegin;
2708   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2709   PetscValidType(B,1);
2710   PetscValidLogicalCollectiveInt(B,bs,2);
2711   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2712   PetscFunctionReturn(0);
2713 }
2714 
2715 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2716 {
2717   Mat_MPIBAIJ    *b;
2718   PetscErrorCode ierr;
2719   PetscInt       i;
2720   PetscMPIInt    size;
2721 
2722   PetscFunctionBegin;
2723   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2724   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2725   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2726   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2727 
2728   if (d_nnz) {
2729     for (i=0; i<B->rmap->n/bs; i++) {
2730       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2731     }
2732   }
2733   if (o_nnz) {
2734     for (i=0; i<B->rmap->n/bs; i++) {
2735       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2736     }
2737   }
2738 
2739   b      = (Mat_MPIBAIJ*)B->data;
2740   b->bs2 = bs*bs;
2741   b->mbs = B->rmap->n/bs;
2742   b->nbs = B->cmap->n/bs;
2743   b->Mbs = B->rmap->N/bs;
2744   b->Nbs = B->cmap->N/bs;
2745 
2746   for (i=0; i<=b->size; i++) {
2747     b->rangebs[i] = B->rmap->range[i]/bs;
2748   }
2749   b->rstartbs = B->rmap->rstart/bs;
2750   b->rendbs   = B->rmap->rend/bs;
2751   b->cstartbs = B->cmap->rstart/bs;
2752   b->cendbs   = B->cmap->rend/bs;
2753 
2754 #if defined(PETSC_USE_CTABLE)
2755   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2756 #else
2757   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2758 #endif
2759   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2760   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2761   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2762 
2763   /* Because the B will have been resized we simply destroy it and create a new one each time */
2764   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2765   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2766   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2767   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2768   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2769   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2770 
2771   if (!B->preallocated) {
2772     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2773     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2774     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2775     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2776     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2777   }
2778 
2779   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2780   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2781   B->preallocated  = PETSC_TRUE;
2782   B->was_assembled = PETSC_FALSE;
2783   B->assembled     = PETSC_FALSE;
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2788 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2789 
2790 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2791 {
2792   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2793   PetscErrorCode ierr;
2794   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2795   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2796   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2797 
2798   PetscFunctionBegin;
2799   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2800   ii[0] = 0;
2801   for (i=0; i<M; i++) {
2802     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2803     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2804     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2805     /* remove one from count of matrix has diagonal */
2806     for (j=id[i]; j<id[i+1]; j++) {
2807       if (jd[j] == i) {ii[i+1]--;break;}
2808     }
2809   }
2810   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2811   cnt  = 0;
2812   for (i=0; i<M; i++) {
2813     for (j=io[i]; j<io[i+1]; j++) {
2814       if (garray[jo[j]] > rstart) break;
2815       jj[cnt++] = garray[jo[j]];
2816     }
2817     for (k=id[i]; k<id[i+1]; k++) {
2818       if (jd[k] != i) {
2819         jj[cnt++] = rstart + jd[k];
2820       }
2821     }
2822     for (; j<io[i+1]; j++) {
2823       jj[cnt++] = garray[jo[j]];
2824     }
2825   }
2826   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2831 
2832 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2833 
2834 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2835 {
2836   PetscErrorCode ierr;
2837   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2838   Mat            B;
2839   Mat_MPIAIJ     *b;
2840 
2841   PetscFunctionBegin;
2842   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2843 
2844   if (reuse == MAT_REUSE_MATRIX) {
2845     B = *newmat;
2846   } else {
2847     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2848     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2849     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2850     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2851     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2852     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2853   }
2854   b = (Mat_MPIAIJ*) B->data;
2855 
2856   if (reuse == MAT_REUSE_MATRIX) {
2857     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2858     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2859   } else {
2860     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2861     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2862     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2863     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2864     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2865     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2866     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867   }
2868   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2869   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2870 
2871   if (reuse == MAT_INPLACE_MATRIX) {
2872     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2873   } else {
2874    *newmat = B;
2875   }
2876   PetscFunctionReturn(0);
2877 }
2878 
2879 /*MC
2880    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2881 
2882    Options Database Keys:
2883 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2884 . -mat_block_size <bs> - set the blocksize used to store the matrix
2885 - -mat_use_hash_table <fact>
2886 
2887    Level: beginner
2888 
2889    Notes:
2890     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2891     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2892 
2893 .seealso: MatCreateBAIJ
2894 M*/
2895 
2896 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2897 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
2898 
2899 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2900 {
2901   Mat_MPIBAIJ    *b;
2902   PetscErrorCode ierr;
2903   PetscBool      flg = PETSC_FALSE;
2904 
2905   PetscFunctionBegin;
2906   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2907   B->data = (void*)b;
2908 
2909   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2910   B->assembled = PETSC_FALSE;
2911 
2912   B->insertmode = NOT_SET_VALUES;
2913   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2914   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2915 
2916   /* build local table of row and column ownerships */
2917   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2918 
2919   /* build cache for off array entries formed */
2920   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2921 
2922   b->donotstash  = PETSC_FALSE;
2923   b->colmap      = NULL;
2924   b->garray      = NULL;
2925   b->roworiented = PETSC_TRUE;
2926 
2927   /* stuff used in block assembly */
2928   b->barray = 0;
2929 
2930   /* stuff used for matrix vector multiply */
2931   b->lvec  = 0;
2932   b->Mvctx = 0;
2933 
2934   /* stuff for MatGetRow() */
2935   b->rowindices   = 0;
2936   b->rowvalues    = 0;
2937   b->getrowactive = PETSC_FALSE;
2938 
2939   /* hash table stuff */
2940   b->ht           = 0;
2941   b->hd           = 0;
2942   b->ht_size      = 0;
2943   b->ht_flag      = PETSC_FALSE;
2944   b->ht_fact      = 0;
2945   b->ht_total_ct  = 0;
2946   b->ht_insert_ct = 0;
2947 
2948   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2949   b->ijonly = PETSC_FALSE;
2950 
2951 
2952   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2953   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2954   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2955 #if defined(PETSC_HAVE_HYPRE)
2956   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2957 #endif
2958   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2959   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2960   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2961   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2962   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2963   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2964   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
2965   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2966   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2967 
2968   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2969   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2970   if (flg) {
2971     PetscReal fact = 1.39;
2972     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2973     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2974     if (fact <= 1.0) fact = 1.39;
2975     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2976     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2977   }
2978   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2979   PetscFunctionReturn(0);
2980 }
2981 
2982 /*MC
2983    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2984 
2985    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2986    and MATMPIBAIJ otherwise.
2987 
2988    Options Database Keys:
2989 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2990 
2991   Level: beginner
2992 
2993 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2994 M*/
2995 
2996 /*@C
2997    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2998    (block compressed row).  For good matrix assembly performance
2999    the user should preallocate the matrix storage by setting the parameters
3000    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3001    performance can be increased by more than a factor of 50.
3002 
3003    Collective on Mat
3004 
3005    Input Parameters:
3006 +  B - the matrix
3007 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3008           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3009 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3010            submatrix  (same for all local rows)
3011 .  d_nnz - array containing the number of block nonzeros in the various block rows
3012            of the in diagonal portion of the local (possibly different for each block
3013            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3014            set it even if it is zero.
3015 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3016            submatrix (same for all local rows).
3017 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3018            off-diagonal portion of the local submatrix (possibly different for
3019            each block row) or NULL.
3020 
3021    If the *_nnz parameter is given then the *_nz parameter is ignored
3022 
3023    Options Database Keys:
3024 +   -mat_block_size - size of the blocks to use
3025 -   -mat_use_hash_table <fact>
3026 
3027    Notes:
3028    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3029    than it must be used on all processors that share the object for that argument.
3030 
3031    Storage Information:
3032    For a square global matrix we define each processor's diagonal portion
3033    to be its local rows and the corresponding columns (a square submatrix);
3034    each processor's off-diagonal portion encompasses the remainder of the
3035    local matrix (a rectangular submatrix).
3036 
3037    The user can specify preallocated storage for the diagonal part of
3038    the local submatrix with either d_nz or d_nnz (not both).  Set
3039    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3040    memory allocation.  Likewise, specify preallocated storage for the
3041    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3042 
3043    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3044    the figure below we depict these three local rows and all columns (0-11).
3045 
3046 .vb
3047            0 1 2 3 4 5 6 7 8 9 10 11
3048           --------------------------
3049    row 3  |o o o d d d o o o o  o  o
3050    row 4  |o o o d d d o o o o  o  o
3051    row 5  |o o o d d d o o o o  o  o
3052           --------------------------
3053 .ve
3054 
3055    Thus, any entries in the d locations are stored in the d (diagonal)
3056    submatrix, and any entries in the o locations are stored in the
3057    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3058    stored simply in the MATSEQBAIJ format for compressed row storage.
3059 
3060    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3061    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3062    In general, for PDE problems in which most nonzeros are near the diagonal,
3063    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3064    or you will get TERRIBLE performance; see the users' manual chapter on
3065    matrices.
3066 
3067    You can call MatGetInfo() to get information on how effective the preallocation was;
3068    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3069    You can also run with the option -info and look for messages with the string
3070    malloc in them to see if additional memory allocation was needed.
3071 
3072    Level: intermediate
3073 
3074 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3075 @*/
3076 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3077 {
3078   PetscErrorCode ierr;
3079 
3080   PetscFunctionBegin;
3081   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3082   PetscValidType(B,1);
3083   PetscValidLogicalCollectiveInt(B,bs,2);
3084   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3085   PetscFunctionReturn(0);
3086 }
3087 
3088 /*@C
3089    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3090    (block compressed row).  For good matrix assembly performance
3091    the user should preallocate the matrix storage by setting the parameters
3092    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3093    performance can be increased by more than a factor of 50.
3094 
3095    Collective
3096 
3097    Input Parameters:
3098 +  comm - MPI communicator
3099 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3100           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3101 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3102            This value should be the same as the local size used in creating the
3103            y vector for the matrix-vector product y = Ax.
3104 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3105            This value should be the same as the local size used in creating the
3106            x vector for the matrix-vector product y = Ax.
3107 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3108 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3109 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3110            submatrix  (same for all local rows)
3111 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3112            of the in diagonal portion of the local (possibly different for each block
3113            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3114            and set it even if it is zero.
3115 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3116            submatrix (same for all local rows).
3117 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3118            off-diagonal portion of the local submatrix (possibly different for
3119            each block row) or NULL.
3120 
3121    Output Parameter:
3122 .  A - the matrix
3123 
3124    Options Database Keys:
3125 +   -mat_block_size - size of the blocks to use
3126 -   -mat_use_hash_table <fact>
3127 
3128    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3129    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3130    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3131 
3132    Notes:
3133    If the *_nnz parameter is given then the *_nz parameter is ignored
3134 
3135    A nonzero block is any block that as 1 or more nonzeros in it
3136 
3137    The user MUST specify either the local or global matrix dimensions
3138    (possibly both).
3139 
3140    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3141    than it must be used on all processors that share the object for that argument.
3142 
3143    Storage Information:
3144    For a square global matrix we define each processor's diagonal portion
3145    to be its local rows and the corresponding columns (a square submatrix);
3146    each processor's off-diagonal portion encompasses the remainder of the
3147    local matrix (a rectangular submatrix).
3148 
3149    The user can specify preallocated storage for the diagonal part of
3150    the local submatrix with either d_nz or d_nnz (not both).  Set
3151    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3152    memory allocation.  Likewise, specify preallocated storage for the
3153    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3154 
3155    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3156    the figure below we depict these three local rows and all columns (0-11).
3157 
3158 .vb
3159            0 1 2 3 4 5 6 7 8 9 10 11
3160           --------------------------
3161    row 3  |o o o d d d o o o o  o  o
3162    row 4  |o o o d d d o o o o  o  o
3163    row 5  |o o o d d d o o o o  o  o
3164           --------------------------
3165 .ve
3166 
3167    Thus, any entries in the d locations are stored in the d (diagonal)
3168    submatrix, and any entries in the o locations are stored in the
3169    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3170    stored simply in the MATSEQBAIJ format for compressed row storage.
3171 
3172    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3173    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3174    In general, for PDE problems in which most nonzeros are near the diagonal,
3175    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3176    or you will get TERRIBLE performance; see the users' manual chapter on
3177    matrices.
3178 
3179    Level: intermediate
3180 
3181 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3182 @*/
3183 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3184 {
3185   PetscErrorCode ierr;
3186   PetscMPIInt    size;
3187 
3188   PetscFunctionBegin;
3189   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3190   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3191   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3192   if (size > 1) {
3193     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3194     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3195   } else {
3196     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3197     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3198   }
3199   PetscFunctionReturn(0);
3200 }
3201 
3202 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3203 {
3204   Mat            mat;
3205   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3206   PetscErrorCode ierr;
3207   PetscInt       len=0;
3208 
3209   PetscFunctionBegin;
3210   *newmat = 0;
3211   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3212   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3213   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3214 
3215   mat->factortype   = matin->factortype;
3216   mat->preallocated = PETSC_TRUE;
3217   mat->assembled    = PETSC_TRUE;
3218   mat->insertmode   = NOT_SET_VALUES;
3219 
3220   a             = (Mat_MPIBAIJ*)mat->data;
3221   mat->rmap->bs = matin->rmap->bs;
3222   a->bs2        = oldmat->bs2;
3223   a->mbs        = oldmat->mbs;
3224   a->nbs        = oldmat->nbs;
3225   a->Mbs        = oldmat->Mbs;
3226   a->Nbs        = oldmat->Nbs;
3227 
3228   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3229   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3230 
3231   a->size         = oldmat->size;
3232   a->rank         = oldmat->rank;
3233   a->donotstash   = oldmat->donotstash;
3234   a->roworiented  = oldmat->roworiented;
3235   a->rowindices   = 0;
3236   a->rowvalues    = 0;
3237   a->getrowactive = PETSC_FALSE;
3238   a->barray       = 0;
3239   a->rstartbs     = oldmat->rstartbs;
3240   a->rendbs       = oldmat->rendbs;
3241   a->cstartbs     = oldmat->cstartbs;
3242   a->cendbs       = oldmat->cendbs;
3243 
3244   /* hash table stuff */
3245   a->ht           = 0;
3246   a->hd           = 0;
3247   a->ht_size      = 0;
3248   a->ht_flag      = oldmat->ht_flag;
3249   a->ht_fact      = oldmat->ht_fact;
3250   a->ht_total_ct  = 0;
3251   a->ht_insert_ct = 0;
3252 
3253   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3254   if (oldmat->colmap) {
3255 #if defined(PETSC_USE_CTABLE)
3256     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3257 #else
3258     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3259     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3260     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3261 #endif
3262   } else a->colmap = 0;
3263 
3264   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3265     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3266     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3267     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3268   } else a->garray = 0;
3269 
3270   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3271   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3272   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3273   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3274   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3275 
3276   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3277   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3278   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3279   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3280   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3281   *newmat = mat;
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3286 {
3287   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3288   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3289   PetscScalar    *matvals;
3290   PetscErrorCode ierr;
3291 
3292   PetscFunctionBegin;
3293   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3294 
3295   /* read in matrix header */
3296   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3297   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3298   M  = header[1]; N = header[2]; nz = header[3];
3299   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3300   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3301   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3302 
3303   /* set block sizes from the viewer's .info file */
3304   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3305   /* set global sizes if not set already */
3306   if (mat->rmap->N < 0) mat->rmap->N = M;
3307   if (mat->cmap->N < 0) mat->cmap->N = N;
3308   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3309   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3310 
3311   /* check if the matrix sizes are correct */
3312   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3313   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3314   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3315   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3316   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3317   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3318   mbs = m/bs; nbs = n/bs;
3319 
3320   /* read in row lengths and build row indices */
3321   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3322   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3323   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3324   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3325   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3326 
3327   /* read in column indices and matrix values */
3328   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3329   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3330   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3331 
3332   { /* preallocate matrix storage */
3333     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3334     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3335     PetscBool  done;
3336     PetscInt   *d_nnz,*o_nnz;
3337 
3338     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3339     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3340     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3341     for (i=0; i<mbs; i++) {
3342       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3343       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3344       for (k=0; k<bs; k++)
3345         for (j=rowidxs[bs*i+k]; j<rowidxs[bs*i+k+1]; j++)
3346           if (colidxs[j] >= cs && colidxs[j] < ce) {
3347             if (!PetscBTLookupSet(bt,(colidxs[j]-cs)/bs)) d_nnz[i]++;
3348           } else {
3349             ierr = PetscHSetIQueryAdd(ht,colidxs[j]/bs,&done);CHKERRQ(ierr);
3350             if (done) o_nnz[i]++;
3351           }
3352     }
3353     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3354     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3355     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3356     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3357   }
3358 
3359   /* store matrix values */
3360   for (i=0; i<m; i++) {
3361     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3362     ierr = MatSetValues_MPIBAIJ(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3363   }
3364 
3365   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3366   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3367   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3368   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3369   PetscFunctionReturn(0);
3370 }
3371 
3372 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3373 {
3374   PetscErrorCode ierr;
3375   PetscBool      isbinary;
3376 
3377   PetscFunctionBegin;
3378   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3379   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3380   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3381   PetscFunctionReturn(0);
3382 }
3383 
3384 /*@
3385    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3386 
3387    Input Parameters:
3388 +  mat  - the matrix
3389 -  fact - factor
3390 
3391    Not Collective, each process can use a different factor
3392 
3393    Level: advanced
3394 
3395   Notes:
3396    This can also be set by the command line option: -mat_use_hash_table <fact>
3397 
3398 .seealso: MatSetOption()
3399 @*/
3400 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3401 {
3402   PetscErrorCode ierr;
3403 
3404   PetscFunctionBegin;
3405   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3410 {
3411   Mat_MPIBAIJ *baij;
3412 
3413   PetscFunctionBegin;
3414   baij          = (Mat_MPIBAIJ*)mat->data;
3415   baij->ht_fact = fact;
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3420 {
3421   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3422   PetscBool      flg;
3423   PetscErrorCode ierr;
3424 
3425   PetscFunctionBegin;
3426   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3427   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3428   if (Ad)     *Ad     = a->A;
3429   if (Ao)     *Ao     = a->B;
3430   if (colmap) *colmap = a->garray;
3431   PetscFunctionReturn(0);
3432 }
3433 
3434 /*
3435     Special version for direct calls from Fortran (to eliminate two function call overheads
3436 */
3437 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3438 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3439 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3440 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3441 #endif
3442 
3443 /*@C
3444   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3445 
3446   Collective on Mat
3447 
3448   Input Parameters:
3449 + mat - the matrix
3450 . min - number of input rows
3451 . im - input rows
3452 . nin - number of input columns
3453 . in - input columns
3454 . v - numerical values input
3455 - addvin - INSERT_VALUES or ADD_VALUES
3456 
3457   Notes:
3458     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3459 
3460   Level: advanced
3461 
3462 .seealso:   MatSetValuesBlocked()
3463 @*/
3464 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3465 {
3466   /* convert input arguments to C version */
3467   Mat        mat  = *matin;
3468   PetscInt   m    = *min, n = *nin;
3469   InsertMode addv = *addvin;
3470 
3471   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3472   const MatScalar *value;
3473   MatScalar       *barray     = baij->barray;
3474   PetscBool       roworiented = baij->roworiented;
3475   PetscErrorCode  ierr;
3476   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3477   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3478   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3479 
3480   PetscFunctionBegin;
3481   /* tasks normally handled by MatSetValuesBlocked() */
3482   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3483 #if defined(PETSC_USE_DEBUG)
3484   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3485   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3486 #endif
3487   if (mat->assembled) {
3488     mat->was_assembled = PETSC_TRUE;
3489     mat->assembled     = PETSC_FALSE;
3490   }
3491   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3492 
3493 
3494   if (!barray) {
3495     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3496     baij->barray = barray;
3497   }
3498 
3499   if (roworiented) stepval = (n-1)*bs;
3500   else stepval = (m-1)*bs;
3501 
3502   for (i=0; i<m; i++) {
3503     if (im[i] < 0) continue;
3504 #if defined(PETSC_USE_DEBUG)
3505     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3506 #endif
3507     if (im[i] >= rstart && im[i] < rend) {
3508       row = im[i] - rstart;
3509       for (j=0; j<n; j++) {
3510         /* If NumCol = 1 then a copy is not required */
3511         if ((roworiented) && (n == 1)) {
3512           barray = (MatScalar*)v + i*bs2;
3513         } else if ((!roworiented) && (m == 1)) {
3514           barray = (MatScalar*)v + j*bs2;
3515         } else { /* Here a copy is required */
3516           if (roworiented) {
3517             value = v + i*(stepval+bs)*bs + j*bs;
3518           } else {
3519             value = v + j*(stepval+bs)*bs + i*bs;
3520           }
3521           for (ii=0; ii<bs; ii++,value+=stepval) {
3522             for (jj=0; jj<bs; jj++) {
3523               *barray++ = *value++;
3524             }
3525           }
3526           barray -=bs2;
3527         }
3528 
3529         if (in[j] >= cstart && in[j] < cend) {
3530           col  = in[j] - cstart;
3531           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3532         } else if (in[j] < 0) continue;
3533 #if defined(PETSC_USE_DEBUG)
3534         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3535 #endif
3536         else {
3537           if (mat->was_assembled) {
3538             if (!baij->colmap) {
3539               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3540             }
3541 
3542 #if defined(PETSC_USE_DEBUG)
3543 #if defined(PETSC_USE_CTABLE)
3544             { PetscInt data;
3545               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3546               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3547             }
3548 #else
3549             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3550 #endif
3551 #endif
3552 #if defined(PETSC_USE_CTABLE)
3553             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3554             col  = (col - 1)/bs;
3555 #else
3556             col = (baij->colmap[in[j]] - 1)/bs;
3557 #endif
3558             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3559               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3560               col  =  in[j];
3561             }
3562           } else col = in[j];
3563           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3564         }
3565       }
3566     } else {
3567       if (!baij->donotstash) {
3568         if (roworiented) {
3569           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3570         } else {
3571           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3572         }
3573       }
3574     }
3575   }
3576 
3577   /* task normally handled by MatSetValuesBlocked() */
3578   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3579   PetscFunctionReturn(0);
3580 }
3581 
3582 /*@
3583      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3584          CSR format the local rows.
3585 
3586    Collective
3587 
3588    Input Parameters:
3589 +  comm - MPI communicator
3590 .  bs - the block size, only a block size of 1 is supported
3591 .  m - number of local rows (Cannot be PETSC_DECIDE)
3592 .  n - This value should be the same as the local size used in creating the
3593        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3594        calculated if N is given) For square matrices n is almost always m.
3595 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3596 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3597 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3598 .   j - column indices
3599 -   a - matrix values
3600 
3601    Output Parameter:
3602 .   mat - the matrix
3603 
3604    Level: intermediate
3605 
3606    Notes:
3607        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3608      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3609      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3610 
3611      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3612      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3613      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3614      with column-major ordering within blocks.
3615 
3616        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3617 
3618 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3619           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3620 @*/
3621 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3622 {
3623   PetscErrorCode ierr;
3624 
3625   PetscFunctionBegin;
3626   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3627   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3628   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3629   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3630   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3631   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3632   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3633   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3634   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3635   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3640 {
3641   PetscErrorCode ierr;
3642   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3643   PetscInt       *indx;
3644   PetscScalar    *values;
3645 
3646   PetscFunctionBegin;
3647   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3648   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3649     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3650     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3651     PetscInt       *bindx,rmax=a->rmax,j;
3652     PetscMPIInt    rank,size;
3653 
3654     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3655     mbs = m/bs; Nbs = N/cbs;
3656     if (n == PETSC_DECIDE) {
3657       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3658     }
3659     nbs = n/cbs;
3660 
3661     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3662     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3663 
3664     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3665     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3666     if (rank == size-1) {
3667       /* Check sum(nbs) = Nbs */
3668       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3669     }
3670 
3671     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3672     for (i=0; i<mbs; i++) {
3673       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3674       nnz = nnz/bs;
3675       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3676       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3677       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3678     }
3679     ierr = PetscFree(bindx);CHKERRQ(ierr);
3680 
3681     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3682     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3683     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3684     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3685     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3686     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3687     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3688   }
3689 
3690   /* numeric phase */
3691   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3692   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3693 
3694   for (i=0; i<m; i++) {
3695     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3696     Ii   = i + rstart;
3697     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3698     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3699   }
3700   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3701   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3702   PetscFunctionReturn(0);
3703 }
3704