xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 880c6e6abb7db98613f1e9493714f5b232801d20)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
10 {
11   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
12   PetscErrorCode ierr;
13   PetscInt       i,*idxb = 0;
14   PetscScalar    *va,*vb;
15   Vec            vtmp;
16 
17   PetscFunctionBegin;
18   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
19   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
20   if (idx) {
21     for (i=0; i<A->rmap->n; i++) {
22       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
23     }
24   }
25 
26   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
27   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
28   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
29   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
30 
31   for (i=0; i<A->rmap->n; i++) {
32     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
33       va[i] = vb[i];
34       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
35     }
36   }
37 
38   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
39   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
40   ierr = PetscFree(idxb);CHKERRQ(ierr);
41   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
60 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
61 {
62   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
67   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 /*
72      Local utility routine that creates a mapping from the global column
73    number to the local number in the off-diagonal part of the local
74    storage of the matrix.  This is done in a non scalable way since the
75    length of colmap equals the global matrix length.
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
80 {
81   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
82   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
83   PetscErrorCode ierr;
84   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
85 
86   PetscFunctionBegin;
87 #if defined(PETSC_USE_CTABLE)
88   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
89   for (i=0; i<nbs; i++) {
90     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
91   }
92 #else
93   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
95   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
102   { \
103  \
104     brow = row/bs;  \
105     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
106     rmax = aimax[brow]; nrow = ailen[brow]; \
107     bcol = col/bs; \
108     ridx = row % bs; cidx = col % bs; \
109     low  = 0; high = nrow; \
110     while (high-low > 3) { \
111       t = (low+high)/2; \
112       if (rp[t] > bcol) high = t; \
113       else              low  = t; \
114     } \
115     for (_i=low; _i<high; _i++) { \
116       if (rp[_i] > bcol) break; \
117       if (rp[_i] == bcol) { \
118         bap = ap +  bs2*_i + bs*cidx + ridx; \
119         if (addv == ADD_VALUES) *bap += value;  \
120         else                    *bap  = value;  \
121         goto a_noinsert; \
122       } \
123     } \
124     if (a->nonew == 1) goto a_noinsert; \
125     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
126     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
127     N = nrow++ - 1;  \
128     /* shift up all the later entries in this row */ \
129     for (ii=N; ii>=_i; ii--) { \
130       rp[ii+1] = rp[ii]; \
131       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
132     } \
133     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
134     rp[_i]                      = bcol;  \
135     ap[bs2*_i + bs*cidx + ridx] = value;  \
136 a_noinsert:; \
137     ailen[brow] = nrow; \
138   }
139 
140 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
141   { \
142     brow = row/bs;  \
143     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
144     rmax = bimax[brow]; nrow = bilen[brow]; \
145     bcol = col/bs; \
146     ridx = row % bs; cidx = col % bs; \
147     low  = 0; high = nrow; \
148     while (high-low > 3) { \
149       t = (low+high)/2; \
150       if (rp[t] > bcol) high = t; \
151       else              low  = t; \
152     } \
153     for (_i=low; _i<high; _i++) { \
154       if (rp[_i] > bcol) break; \
155       if (rp[_i] == bcol) { \
156         bap = ap +  bs2*_i + bs*cidx + ridx; \
157         if (addv == ADD_VALUES) *bap += value;  \
158         else                    *bap  = value;  \
159         goto b_noinsert; \
160       } \
161     } \
162     if (b->nonew == 1) goto b_noinsert; \
163     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
164     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
165     N = nrow++ - 1;  \
166     /* shift up all the later entries in this row */ \
167     for (ii=N; ii>=_i; ii--) { \
168       rp[ii+1] = rp[ii]; \
169       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
170     } \
171     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
172     rp[_i]                      = bcol;  \
173     ap[bs2*_i + bs*cidx + ridx] = value;  \
174 b_noinsert:; \
175     bilen[brow] = nrow; \
176   }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatSetValues_MPIBAIJ"
180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
181 {
182   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
183   MatScalar      value;
184   PetscBool      roworiented = baij->roworiented;
185   PetscErrorCode ierr;
186   PetscInt       i,j,row,col;
187   PetscInt       rstart_orig=mat->rmap->rstart;
188   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
189   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
190 
191   /* Some Variables required in the macro */
192   Mat         A     = baij->A;
193   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
194   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
195   MatScalar   *aa   =a->a;
196 
197   Mat         B     = baij->B;
198   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
199   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
200   MatScalar   *ba   =b->a;
201 
202   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
203   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
204   MatScalar *ap,*bap;
205 
206   PetscFunctionBegin;
207   for (i=0; i<m; i++) {
208     if (im[i] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
211 #endif
212     if (im[i] >= rstart_orig && im[i] < rend_orig) {
213       row = im[i] - rstart_orig;
214       for (j=0; j<n; j++) {
215         if (in[j] >= cstart_orig && in[j] < cend_orig) {
216           col = in[j] - cstart_orig;
217           if (roworiented) value = v[i*n+j];
218           else             value = v[i+j*m];
219           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
220           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
221         } else if (in[j] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
224 #endif
225         else {
226           if (mat->was_assembled) {
227             if (!baij->colmap) {
228               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
229             }
230 #if defined(PETSC_USE_CTABLE)
231             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
232             col  = col - 1;
233 #else
234             col = baij->colmap[in[j]/bs] - 1;
235 #endif
236             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
237               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
238               col  =  in[j];
239               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
240               B    = baij->B;
241               b    = (Mat_SeqBAIJ*)(B)->data;
242               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
243               ba   =b->a;
244             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
245             else col += in[j]%bs;
246           } else col = in[j];
247           if (roworiented) value = v[i*n+j];
248           else             value = v[i+j*m];
249           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
250           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
251         }
252       }
253     } else {
254       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
255       if (!baij->donotstash) {
256         mat->assembled = PETSC_FALSE;
257         if (roworiented) {
258           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
259         } else {
260           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
261         }
262       }
263     }
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
270 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
271 {
272   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
273   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
274   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
275   PetscErrorCode    ierr;
276   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
277   PetscBool         roworiented=a->roworiented;
278   const PetscScalar *value     = v;
279   MatScalar         *ap,*aa = a->a,*bap;
280 
281   PetscFunctionBegin;
282   if (roworiented) {
283     stepval = (n-1)*bs;
284   } else {
285     stepval = (m-1)*bs;
286   }
287   for (k=0; k<m; k++) { /* loop over added rows */
288     row = im[k];
289     rp   = aj + ai[row];
290     ap   = aa + bs2*ai[row];
291     rmax = imax[row];
292     nrow = ailen[row];
293     low  = 0;
294     high = nrow;
295     for (l=0; l<n; l++) { /* loop over added columns */
296       col = in[l];
297       if (roworiented) {
298         value = v + (k*(stepval+bs) + l)*bs;
299       } else {
300         value = v + (l*(stepval+bs) + k)*bs;
301       }
302       if (col <= lastcol) low = 0;
303       else high = nrow;
304       lastcol = col;
305       while (high-low > 7) {
306         t = (low+high)/2;
307         if (rp[t] > col) high = t;
308         else             low  = t;
309       }
310       for (i=low; i<high; i++) {
311         if (rp[i] > col) break;
312         if (rp[i] == col) {
313           bap = ap +  bs2*i;
314           if (roworiented) {
315             if (is == ADD_VALUES) {
316               for (ii=0; ii<bs; ii++,value+=stepval) {
317                 for (jj=ii; jj<bs2; jj+=bs) {
318                   bap[jj] += *value++;
319                 }
320               }
321             } else {
322               for (ii=0; ii<bs; ii++,value+=stepval) {
323                 for (jj=ii; jj<bs2; jj+=bs) {
324                   bap[jj] = *value++;
325                 }
326               }
327             }
328           } else {
329             if (is == ADD_VALUES) {
330               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
331                 for (jj=0; jj<bs; jj++) {
332                   bap[jj] += value[jj];
333                 }
334                 bap += bs;
335               }
336             } else {
337               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
338                 for (jj=0; jj<bs; jj++) {
339                   bap[jj]  = value[jj];
340                 }
341                 bap += bs;
342               }
343             }
344           }
345           goto noinsert2;
346         }
347       }
348       if (nonew == 1) goto noinsert2;
349       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
350       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
351       N = nrow++ - 1; high++;
352       /* shift up all the later entries in this row */
353       for (ii=N; ii>=i; ii--) {
354         rp[ii+1] = rp[ii];
355         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
356       }
357       if (N >= i) {
358         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
359       }
360       rp[i] = col;
361       bap   = ap +  bs2*i;
362       if (roworiented) {
363         for (ii=0; ii<bs; ii++,value+=stepval) {
364           for (jj=ii; jj<bs2; jj+=bs) {
365             bap[jj] = *value++;
366           }
367         }
368       } else {
369         for (ii=0; ii<bs; ii++,value+=stepval) {
370           for (jj=0; jj<bs; jj++) {
371             *bap++ = *value++;
372           }
373         }
374       }
375 noinsert2:;
376       low = i;
377     }
378     ailen[row] = nrow;
379   }
380   PetscFunctionReturn(0);
381 }
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
385 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386 {
387   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
388   const PetscScalar *value;
389   MatScalar         *barray     = baij->barray;
390   PetscBool         roworiented = baij->roworiented;
391   PetscErrorCode    ierr;
392   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
393   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
394   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
395 
396   PetscFunctionBegin;
397   if (!barray) {
398     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
399     baij->barray = barray;
400   }
401 
402   if (roworiented) stepval = (n-1)*bs;
403   else stepval = (m-1)*bs;
404 
405   for (i=0; i<m; i++) {
406     if (im[i] < 0) continue;
407 #if defined(PETSC_USE_DEBUG)
408     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
409 #endif
410     if (im[i] >= rstart && im[i] < rend) {
411       row = im[i] - rstart;
412       for (j=0; j<n; j++) {
413         /* If NumCol = 1 then a copy is not required */
414         if ((roworiented) && (n == 1)) {
415           barray = (MatScalar*)v + i*bs2;
416         } else if ((!roworiented) && (m == 1)) {
417           barray = (MatScalar*)v + j*bs2;
418         } else { /* Here a copy is required */
419           if (roworiented) {
420             value = v + (i*(stepval+bs) + j)*bs;
421           } else {
422             value = v + (j*(stepval+bs) + i)*bs;
423           }
424           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
425             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
426             barray += bs;
427           }
428           barray -= bs2;
429         }
430 
431         if (in[j] >= cstart && in[j] < cend) {
432           col  = in[j] - cstart;
433           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,1,&row,1,&col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
434         } else if (in[j] < 0) continue;
435 #if defined(PETSC_USE_DEBUG)
436         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexec column too large %D max %D",in[j],baij->Nbs-1);
437 #endif
438         else {
439           if (mat->was_assembled) {
440             if (!baij->colmap) {
441               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
442             }
443 
444 #if defined(PETSC_USE_DEBUG)
445 #if defined(PETSC_USE_CTABLE)
446             { PetscInt data;
447               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
448               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
449             }
450 #else
451             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
452 #endif
453 #endif
454 #if defined(PETSC_USE_CTABLE)
455             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
456             col  = (col - 1)/bs;
457 #else
458             col = (baij->colmap[in[j]] - 1)/bs;
459 #endif
460             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
461               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
462               col  =  in[j];
463             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
464           } else col = in[j];
465           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,1,&row,1,&col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
466         }
467       }
468     } else {
469       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
470       if (!baij->donotstash) {
471         if (roworiented) {
472           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
473         } else {
474           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
475         }
476       }
477     }
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #define HASH_KEY 0.6180339887
483 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486 #undef __FUNCT__
487 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
488 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
489 {
490   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
491   PetscBool      roworiented = baij->roworiented;
492   PetscErrorCode ierr;
493   PetscInt       i,j,row,col;
494   PetscInt       rstart_orig=mat->rmap->rstart;
495   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
496   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
497   PetscReal      tmp;
498   MatScalar      **HD = baij->hd,value;
499 #if defined(PETSC_USE_DEBUG)
500   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
501 #endif
502 
503   PetscFunctionBegin;
504   for (i=0; i<m; i++) {
505 #if defined(PETSC_USE_DEBUG)
506     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
507     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
508 #endif
509     row = im[i];
510     if (row >= rstart_orig && row < rend_orig) {
511       for (j=0; j<n; j++) {
512         col = in[j];
513         if (roworiented) value = v[i*n+j];
514         else             value = v[i+j*m];
515         /* Look up PetscInto the Hash Table */
516         key = (row/bs)*Nbs+(col/bs)+1;
517         h1  = HASH(size,key,tmp);
518 
519 
520         idx = h1;
521 #if defined(PETSC_USE_DEBUG)
522         insert_ct++;
523         total_ct++;
524         if (HT[idx] != key) {
525           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
526           if (idx == size) {
527             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
528             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
529           }
530         }
531 #else
532         if (HT[idx] != key) {
533           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
534           if (idx == size) {
535             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
536             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
537           }
538         }
539 #endif
540         /* A HASH table entry is found, so insert the values at the correct address */
541         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
542         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
543       }
544     } else if (!baij->donotstash) {
545       if (roworiented) {
546         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
547       } else {
548         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
549       }
550     }
551   }
552 #if defined(PETSC_USE_DEBUG)
553   baij->ht_total_ct  = total_ct;
554   baij->ht_insert_ct = insert_ct;
555 #endif
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
561 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
562 {
563   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
564   PetscBool         roworiented = baij->roworiented;
565   PetscErrorCode    ierr;
566   PetscInt          i,j,ii,jj,row,col;
567   PetscInt          rstart=baij->rstartbs;
568   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
569   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
570   PetscReal         tmp;
571   MatScalar         **HD = baij->hd,*baij_a;
572   const PetscScalar *v_t,*value;
573 #if defined(PETSC_USE_DEBUG)
574   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
575 #endif
576 
577   PetscFunctionBegin;
578   if (roworiented) stepval = (n-1)*bs;
579   else stepval = (m-1)*bs;
580 
581   for (i=0; i<m; i++) {
582 #if defined(PETSC_USE_DEBUG)
583     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
584     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
585 #endif
586     row = im[i];
587     v_t = v + i*nbs2;
588     if (row >= rstart && row < rend) {
589       for (j=0; j<n; j++) {
590         col = in[j];
591 
592         /* Look up into the Hash Table */
593         key = row*Nbs+col+1;
594         h1  = HASH(size,key,tmp);
595 
596         idx = h1;
597 #if defined(PETSC_USE_DEBUG)
598         total_ct++;
599         insert_ct++;
600         if (HT[idx] != key) {
601           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
602           if (idx == size) {
603             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
604             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
605           }
606         }
607 #else
608         if (HT[idx] != key) {
609           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
610           if (idx == size) {
611             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
612             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613           }
614         }
615 #endif
616         baij_a = HD[idx];
617         if (roworiented) {
618           /*value = v + i*(stepval+bs)*bs + j*bs;*/
619           /* value = v + (i*(stepval+bs)+j)*bs; */
620           value = v_t;
621           v_t  += bs;
622           if (addv == ADD_VALUES) {
623             for (ii=0; ii<bs; ii++,value+=stepval) {
624               for (jj=ii; jj<bs2; jj+=bs) {
625                 baij_a[jj] += *value++;
626               }
627             }
628           } else {
629             for (ii=0; ii<bs; ii++,value+=stepval) {
630               for (jj=ii; jj<bs2; jj+=bs) {
631                 baij_a[jj] = *value++;
632               }
633             }
634           }
635         } else {
636           value = v + j*(stepval+bs)*bs + i*bs;
637           if (addv == ADD_VALUES) {
638             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
639               for (jj=0; jj<bs; jj++) {
640                 baij_a[jj] += *value++;
641               }
642             }
643           } else {
644             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
645               for (jj=0; jj<bs; jj++) {
646                 baij_a[jj] = *value++;
647               }
648             }
649           }
650         }
651       }
652     } else {
653       if (!baij->donotstash) {
654         if (roworiented) {
655           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
656         } else {
657           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
658         }
659       }
660     }
661   }
662 #if defined(PETSC_USE_DEBUG)
663   baij->ht_total_ct  = total_ct;
664   baij->ht_insert_ct = insert_ct;
665 #endif
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatGetValues_MPIBAIJ"
671 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
672 {
673   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
674   PetscErrorCode ierr;
675   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
676   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
677 
678   PetscFunctionBegin;
679   for (i=0; i<m; i++) {
680     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
681     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
682     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
683       row = idxm[i] - bsrstart;
684       for (j=0; j<n; j++) {
685         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
686         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
687         if (idxn[j] >= bscstart && idxn[j] < bscend) {
688           col  = idxn[j] - bscstart;
689           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
690         } else {
691           if (!baij->colmap) {
692             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
693           }
694 #if defined(PETSC_USE_CTABLE)
695           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
696           data--;
697 #else
698           data = baij->colmap[idxn[j]/bs]-1;
699 #endif
700           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
701           else {
702             col  = data + idxn[j]%bs;
703             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
704           }
705         }
706       }
707     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
708   }
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatNorm_MPIBAIJ"
714 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
715 {
716   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
717   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
718   PetscErrorCode ierr;
719   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
720   PetscReal      sum = 0.0;
721   MatScalar      *v;
722 
723   PetscFunctionBegin;
724   if (baij->size == 1) {
725     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
726   } else {
727     if (type == NORM_FROBENIUS) {
728       v  = amat->a;
729       nz = amat->nz*bs2;
730       for (i=0; i<nz; i++) {
731         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
732       }
733       v  = bmat->a;
734       nz = bmat->nz*bs2;
735       for (i=0; i<nz; i++) {
736         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
737       }
738       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
739       *nrm = PetscSqrtReal(*nrm);
740     } else if (type == NORM_1) { /* max column sum */
741       PetscReal *tmp,*tmp2;
742       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
743       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
744       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
745       v    = amat->a; jj = amat->j;
746       for (i=0; i<amat->nz; i++) {
747         for (j=0; j<bs; j++) {
748           col = bs*(cstart + *jj) + j; /* column index */
749           for (row=0; row<bs; row++) {
750             tmp[col] += PetscAbsScalar(*v);  v++;
751           }
752         }
753         jj++;
754       }
755       v = bmat->a; jj = bmat->j;
756       for (i=0; i<bmat->nz; i++) {
757         for (j=0; j<bs; j++) {
758           col = bs*garray[*jj] + j;
759           for (row=0; row<bs; row++) {
760             tmp[col] += PetscAbsScalar(*v); v++;
761           }
762         }
763         jj++;
764       }
765       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
766       *nrm = 0.0;
767       for (j=0; j<mat->cmap->N; j++) {
768         if (tmp2[j] > *nrm) *nrm = tmp2[j];
769       }
770       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
771     } else if (type == NORM_INFINITY) { /* max row sum */
772       PetscReal *sums;
773       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
774       sum  = 0.0;
775       for (j=0; j<amat->mbs; j++) {
776         for (row=0; row<bs; row++) sums[row] = 0.0;
777         v  = amat->a + bs2*amat->i[j];
778         nz = amat->i[j+1]-amat->i[j];
779         for (i=0; i<nz; i++) {
780           for (col=0; col<bs; col++) {
781             for (row=0; row<bs; row++) {
782               sums[row] += PetscAbsScalar(*v); v++;
783             }
784           }
785         }
786         v  = bmat->a + bs2*bmat->i[j];
787         nz = bmat->i[j+1]-bmat->i[j];
788         for (i=0; i<nz; i++) {
789           for (col=0; col<bs; col++) {
790             for (row=0; row<bs; row++) {
791               sums[row] += PetscAbsScalar(*v); v++;
792             }
793           }
794         }
795         for (row=0; row<bs; row++) {
796           if (sums[row] > sum) sum = sums[row];
797         }
798       }
799       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
800       ierr = PetscFree(sums);CHKERRQ(ierr);
801     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
802   }
803   PetscFunctionReturn(0);
804 }
805 
806 /*
807   Creates the hash table, and sets the table
808   This table is created only once.
809   If new entried need to be added to the matrix
810   then the hash table has to be destroyed and
811   recreated.
812 */
813 #undef __FUNCT__
814 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
815 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
816 {
817   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
818   Mat            A     = baij->A,B=baij->B;
819   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
820   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
821   PetscErrorCode ierr;
822   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
823   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
824   PetscInt       *HT,key;
825   MatScalar      **HD;
826   PetscReal      tmp;
827 #if defined(PETSC_USE_INFO)
828   PetscInt ct=0,max=0;
829 #endif
830 
831   PetscFunctionBegin;
832   if (baij->ht) PetscFunctionReturn(0);
833 
834   baij->ht_size = (PetscInt)(factor*nz);
835   ht_size       = baij->ht_size;
836 
837   /* Allocate Memory for Hash Table */
838   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
839   HD   = baij->hd;
840   HT   = baij->ht;
841 
842   /* Loop Over A */
843   for (i=0; i<a->mbs; i++) {
844     for (j=ai[i]; j<ai[i+1]; j++) {
845       row = i+rstart;
846       col = aj[j]+cstart;
847 
848       key = row*Nbs + col + 1;
849       h1  = HASH(ht_size,key,tmp);
850       for (k=0; k<ht_size; k++) {
851         if (!HT[(h1+k)%ht_size]) {
852           HT[(h1+k)%ht_size] = key;
853           HD[(h1+k)%ht_size] = a->a + j*bs2;
854           break;
855 #if defined(PETSC_USE_INFO)
856         } else {
857           ct++;
858 #endif
859         }
860       }
861 #if defined(PETSC_USE_INFO)
862       if (k> max) max = k;
863 #endif
864     }
865   }
866   /* Loop Over B */
867   for (i=0; i<b->mbs; i++) {
868     for (j=bi[i]; j<bi[i+1]; j++) {
869       row = i+rstart;
870       col = garray[bj[j]];
871       key = row*Nbs + col + 1;
872       h1  = HASH(ht_size,key,tmp);
873       for (k=0; k<ht_size; k++) {
874         if (!HT[(h1+k)%ht_size]) {
875           HT[(h1+k)%ht_size] = key;
876           HD[(h1+k)%ht_size] = b->a + j*bs2;
877           break;
878 #if defined(PETSC_USE_INFO)
879         } else {
880           ct++;
881 #endif
882         }
883       }
884 #if defined(PETSC_USE_INFO)
885       if (k> max) max = k;
886 #endif
887     }
888   }
889 
890   /* Print Summary */
891 #if defined(PETSC_USE_INFO)
892   for (i=0,j=0; i<ht_size; i++) {
893     if (HT[i]) j++;
894   }
895   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
896 #endif
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
902 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
903 {
904   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
905   PetscErrorCode ierr;
906   PetscInt       nstash,reallocs;
907   InsertMode     addv;
908 
909   PetscFunctionBegin;
910   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
911 
912   /* make sure all processors are either in INSERTMODE or ADDMODE */
913   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
914   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
915   mat->insertmode = addv; /* in case this processor had no cache */
916 
917   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
918   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
919   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
920   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
921   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
922   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
928 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
929 {
930   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
931   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
932   PetscErrorCode ierr;
933   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
934   PetscInt       *row,*col;
935   PetscBool      r1,r2,r3,other_disassembled;
936   MatScalar      *val;
937   InsertMode     addv = mat->insertmode;
938   PetscMPIInt    n;
939 
940   PetscFunctionBegin;
941   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
942   if (!baij->donotstash && !mat->nooffprocentries) {
943     while (1) {
944       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
945       if (!flg) break;
946 
947       for (i=0; i<n;) {
948         /* Now identify the consecutive vals belonging to the same row */
949         for (j=i,rstart=row[j]; j<n; j++) {
950           if (row[j] != rstart) break;
951         }
952         if (j < n) ncols = j-i;
953         else       ncols = n-i;
954         /* Now assemble all these values with a single function call */
955         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
956         i    = j;
957       }
958     }
959     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
960     /* Now process the block-stash. Since the values are stashed column-oriented,
961        set the roworiented flag to column oriented, and after MatSetValues()
962        restore the original flags */
963     r1 = baij->roworiented;
964     r2 = a->roworiented;
965     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
966 
967     baij->roworiented = PETSC_FALSE;
968     a->roworiented    = PETSC_FALSE;
969 
970     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
971     while (1) {
972       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
973       if (!flg) break;
974 
975       for (i=0; i<n;) {
976         /* Now identify the consecutive vals belonging to the same row */
977         for (j=i,rstart=row[j]; j<n; j++) {
978           if (row[j] != rstart) break;
979         }
980         if (j < n) ncols = j-i;
981         else       ncols = n-i;
982         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
983         i    = j;
984       }
985     }
986     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
987 
988     baij->roworiented = r1;
989     a->roworiented    = r2;
990 
991     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
992   }
993 
994   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
995   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
996 
997   /* determine if any processor has disassembled, if so we must
998      also disassemble ourselfs, in order that we may reassemble. */
999   /*
1000      if nonzero structure of submatrix B cannot change then we know that
1001      no processor disassembled thus we can skip this stuff
1002   */
1003   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
1004     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1005     if (mat->was_assembled && !other_disassembled) {
1006       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1007     }
1008   }
1009 
1010   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1011     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1012   }
1013   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1014   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1015 
1016 #if defined(PETSC_USE_INFO)
1017   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1018     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1019 
1020     baij->ht_total_ct  = 0;
1021     baij->ht_insert_ct = 0;
1022   }
1023 #endif
1024   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1025     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1026 
1027     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1028     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1029   }
1030 
1031   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1032 
1033   baij->rowvalues = 0;
1034 
1035   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1036   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
1037     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1038     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1039   }
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
1044 #include <petscdraw.h>
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1047 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1048 {
1049   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1050   PetscErrorCode    ierr;
1051   PetscMPIInt       rank = baij->rank;
1052   PetscInt          bs   = mat->rmap->bs;
1053   PetscBool         iascii,isdraw;
1054   PetscViewer       sviewer;
1055   PetscViewerFormat format;
1056 
1057   PetscFunctionBegin;
1058   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1059   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1060   if (iascii) {
1061     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1062     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1063       MatInfo info;
1064       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1065       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1066       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1067       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1068                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1069       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1070       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1071       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1072       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1073       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1074       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1075       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1076       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1077       PetscFunctionReturn(0);
1078     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1079       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1080       PetscFunctionReturn(0);
1081     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1082       PetscFunctionReturn(0);
1083     }
1084   }
1085 
1086   if (isdraw) {
1087     PetscDraw draw;
1088     PetscBool isnull;
1089     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1090     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1091   }
1092 
1093   {
1094     /* assemble the entire matrix onto first processor. */
1095     Mat         A;
1096     Mat_SeqBAIJ *Aloc;
1097     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1098     MatScalar   *a;
1099     const char  *matname;
1100 
1101     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1102     /* Perhaps this should be the type of mat? */
1103     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1104     if (!rank) {
1105       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1108     }
1109     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1110     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1111     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1112     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1113 
1114     /* copy over the A part */
1115     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1116     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1117     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1118 
1119     for (i=0; i<mbs; i++) {
1120       rvals[0] = bs*(baij->rstartbs + i);
1121       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1122       for (j=ai[i]; j<ai[i+1]; j++) {
1123         col = (baij->cstartbs+aj[j])*bs;
1124         for (k=0; k<bs; k++) {
1125           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1126           col++; a += bs;
1127         }
1128       }
1129     }
1130     /* copy over the B part */
1131     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1132     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1133     for (i=0; i<mbs; i++) {
1134       rvals[0] = bs*(baij->rstartbs + i);
1135       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1136       for (j=ai[i]; j<ai[i+1]; j++) {
1137         col = baij->garray[aj[j]]*bs;
1138         for (k=0; k<bs; k++) {
1139           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1140           col++; a += bs;
1141         }
1142       }
1143     }
1144     ierr = PetscFree(rvals);CHKERRQ(ierr);
1145     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1146     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1147     /*
1148        Everyone has to call to draw the matrix since the graphics waits are
1149        synchronized across all processors that share the PetscDraw object
1150     */
1151     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1152     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1153     if (!rank) {
1154       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1155       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1156     }
1157     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1158     ierr = MatDestroy(&A);CHKERRQ(ierr);
1159   }
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1165 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1166 {
1167   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1168   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1169   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1170   PetscErrorCode ierr;
1171   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1172   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1173   int            fd;
1174   PetscScalar    *column_values;
1175   FILE           *file;
1176   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1177   PetscInt       message_count,flowcontrolcount;
1178 
1179   PetscFunctionBegin;
1180   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1181   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1182   nz   = bs2*(A->nz + B->nz);
1183   rlen = mat->rmap->n;
1184   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1185   if (!rank) {
1186     header[0] = MAT_FILE_CLASSID;
1187     header[1] = mat->rmap->N;
1188     header[2] = mat->cmap->N;
1189 
1190     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1191     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1192     /* get largest number of rows any processor has */
1193     range = mat->rmap->range;
1194     for (i=1; i<size; i++) {
1195       rlen = PetscMax(rlen,range[i+1] - range[i]);
1196     }
1197   } else {
1198     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1199   }
1200 
1201   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1202   /* compute lengths of each row  */
1203   for (i=0; i<a->mbs; i++) {
1204     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1205   }
1206   /* store the row lengths to the file */
1207   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1208   if (!rank) {
1209     MPI_Status status;
1210     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1211     rlen = (range[1] - range[0])/bs;
1212     for (i=0; i<rlen; i++) {
1213       for (j=0; j<bs; j++) {
1214         row_lens[i*bs+j] = bs*crow_lens[i];
1215       }
1216     }
1217     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1218     for (i=1; i<size; i++) {
1219       rlen = (range[i+1] - range[i])/bs;
1220       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1221       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1222       for (k=0; k<rlen; k++) {
1223         for (j=0; j<bs; j++) {
1224           row_lens[k*bs+j] = bs*crow_lens[k];
1225         }
1226       }
1227       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     }
1229     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1230     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1231   } else {
1232     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1233     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1235   }
1236   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1237 
1238   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1239      information needed to make it for each row from a block row. This does require more communication but still not more than
1240      the communication needed for the nonzero values  */
1241   nzmax = nz; /*  space a largest processor needs */
1242   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1243   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1244   cnt   = 0;
1245   for (i=0; i<a->mbs; i++) {
1246     pcnt = cnt;
1247     for (j=B->i[i]; j<B->i[i+1]; j++) {
1248       if ((col = garray[B->j[j]]) > cstart) break;
1249       for (l=0; l<bs; l++) {
1250         column_indices[cnt++] = bs*col+l;
1251       }
1252     }
1253     for (k=A->i[i]; k<A->i[i+1]; k++) {
1254       for (l=0; l<bs; l++) {
1255         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1256       }
1257     }
1258     for (; j<B->i[i+1]; j++) {
1259       for (l=0; l<bs; l++) {
1260         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1261       }
1262     }
1263     len = cnt - pcnt;
1264     for (k=1; k<bs; k++) {
1265       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1266       cnt += len;
1267     }
1268   }
1269   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1270 
1271   /* store the columns to the file */
1272   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1273   if (!rank) {
1274     MPI_Status status;
1275     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1276     for (i=1; i<size; i++) {
1277       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1278       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1279       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1280       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     }
1282     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1283   } else {
1284     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1285     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1286     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1287     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1288   }
1289   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1290 
1291   /* load up the numerical values */
1292   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1293   cnt  = 0;
1294   for (i=0; i<a->mbs; i++) {
1295     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1296     for (j=B->i[i]; j<B->i[i+1]; j++) {
1297       if (garray[B->j[j]] > cstart) break;
1298       for (l=0; l<bs; l++) {
1299         for (ll=0; ll<bs; ll++) {
1300           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1301         }
1302       }
1303       cnt += bs;
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) {
1306       for (l=0; l<bs; l++) {
1307         for (ll=0; ll<bs; ll++) {
1308           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1309         }
1310       }
1311       cnt += bs;
1312     }
1313     for (; j<B->i[i+1]; j++) {
1314       for (l=0; l<bs; l++) {
1315         for (ll=0; ll<bs; ll++) {
1316           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1317         }
1318       }
1319       cnt += bs;
1320     }
1321     cnt += (bs-1)*rlen;
1322   }
1323   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1324 
1325   /* store the column values to the file */
1326   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1327   if (!rank) {
1328     MPI_Status status;
1329     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1332       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1333       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1339     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1340     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1342   }
1343   ierr = PetscFree(column_values);CHKERRQ(ierr);
1344 
1345   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1346   if (file) {
1347     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatView_MPIBAIJ"
1354 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1355 {
1356   PetscErrorCode ierr;
1357   PetscBool      iascii,isdraw,issocket,isbinary;
1358 
1359   PetscFunctionBegin;
1360   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1361   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1362   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1363   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1364   if (iascii || isdraw || issocket) {
1365     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1366   } else if (isbinary) {
1367     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1368   }
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1374 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1375 {
1376   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1377   PetscErrorCode ierr;
1378 
1379   PetscFunctionBegin;
1380 #if defined(PETSC_USE_LOG)
1381   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1382 #endif
1383   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1384   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1385   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1386   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1387 #if defined(PETSC_USE_CTABLE)
1388   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1389 #else
1390   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1391 #endif
1392   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1393   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1394   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1395   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1396   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1397   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1398   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1399   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1400 
1401   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1402   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1403   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1404   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1405   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1406   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1407   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1408   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1409   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1410   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatMult_MPIBAIJ"
1416 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1417 {
1418   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420   PetscInt       nt;
1421 
1422   PetscFunctionBegin;
1423   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1424   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1425   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1426   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1427   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1428   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1429   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1430   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1436 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1437 {
1438   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1439   PetscErrorCode ierr;
1440 
1441   PetscFunctionBegin;
1442   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1444   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1445   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1451 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1452 {
1453   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1454   PetscErrorCode ierr;
1455   PetscBool      merged;
1456 
1457   PetscFunctionBegin;
1458   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1459   /* do nondiagonal part */
1460   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1461   if (!merged) {
1462     /* send it on its way */
1463     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1464     /* do local part */
1465     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1466     /* receive remote parts: note this assumes the values are not actually */
1467     /* inserted in yy until the next line */
1468     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1469   } else {
1470     /* do local part */
1471     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1472     /* send it on its way */
1473     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1474     /* values actually were received in the Begin() but we need to call this nop */
1475     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1482 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1483 {
1484   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   /* do nondiagonal part */
1489   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1490   /* send it on its way */
1491   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1492   /* do local part */
1493   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1494   /* receive remote parts: note this assumes the values are not actually */
1495   /* inserted in yy until the next line, which is true for my implementation*/
1496   /* but is not perhaps always true. */
1497   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 /*
1502   This only works correctly for square matrices where the subblock A->A is the
1503    diagonal block
1504 */
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1507 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1508 {
1509   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1510   PetscErrorCode ierr;
1511 
1512   PetscFunctionBegin;
1513   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1514   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatScale_MPIBAIJ"
1520 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1521 {
1522   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1527   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1533 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1534 {
1535   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1536   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1537   PetscErrorCode ierr;
1538   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1539   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1540   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1541 
1542   PetscFunctionBegin;
1543   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1544   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1545   mat->getrowactive = PETSC_TRUE;
1546 
1547   if (!mat->rowvalues && (idx || v)) {
1548     /*
1549         allocate enough space to hold information from the longest row.
1550     */
1551     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1552     PetscInt    max = 1,mbs = mat->mbs,tmp;
1553     for (i=0; i<mbs; i++) {
1554       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1555       if (max < tmp) max = tmp;
1556     }
1557     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1558   }
1559   lrow = row - brstart;
1560 
1561   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1562   if (!v)   {pvA = 0; pvB = 0;}
1563   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1564   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1565   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1566   nztot = nzA + nzB;
1567 
1568   cmap = mat->garray;
1569   if (v  || idx) {
1570     if (nztot) {
1571       /* Sort by increasing column numbers, assuming A and B already sorted */
1572       PetscInt imark = -1;
1573       if (v) {
1574         *v = v_p = mat->rowvalues;
1575         for (i=0; i<nzB; i++) {
1576           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1577           else break;
1578         }
1579         imark = i;
1580         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1581         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1582       }
1583       if (idx) {
1584         *idx = idx_p = mat->rowindices;
1585         if (imark > -1) {
1586           for (i=0; i<imark; i++) {
1587             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1588           }
1589         } else {
1590           for (i=0; i<nzB; i++) {
1591             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1592             else break;
1593           }
1594           imark = i;
1595         }
1596         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1597         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1598       }
1599     } else {
1600       if (idx) *idx = 0;
1601       if (v)   *v   = 0;
1602     }
1603   }
1604   *nz  = nztot;
1605   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1606   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1612 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1613 {
1614   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1615 
1616   PetscFunctionBegin;
1617   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1618   baij->getrowactive = PETSC_FALSE;
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1624 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1625 {
1626   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1631   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1637 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1638 {
1639   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1640   Mat            A  = a->A,B = a->B;
1641   PetscErrorCode ierr;
1642   PetscReal      isend[5],irecv[5];
1643 
1644   PetscFunctionBegin;
1645   info->block_size = (PetscReal)matin->rmap->bs;
1646 
1647   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1648 
1649   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1650   isend[3] = info->memory;  isend[4] = info->mallocs;
1651 
1652   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1653 
1654   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1655   isend[3] += info->memory;  isend[4] += info->mallocs;
1656 
1657   if (flag == MAT_LOCAL) {
1658     info->nz_used      = isend[0];
1659     info->nz_allocated = isend[1];
1660     info->nz_unneeded  = isend[2];
1661     info->memory       = isend[3];
1662     info->mallocs      = isend[4];
1663   } else if (flag == MAT_GLOBAL_MAX) {
1664     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else if (flag == MAT_GLOBAL_SUM) {
1672     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1681   info->fill_ratio_needed = 0;
1682   info->factor_mallocs    = 0;
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1688 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1689 {
1690   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   switch (op) {
1695   case MAT_NEW_NONZERO_LOCATIONS:
1696   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1697   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1698   case MAT_KEEP_NONZERO_PATTERN:
1699   case MAT_NEW_NONZERO_LOCATION_ERR:
1700     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1701     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1702     break;
1703   case MAT_ROW_ORIENTED:
1704     a->roworiented = flg;
1705 
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_NEW_DIAGONALS:
1710     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1711     break;
1712   case MAT_IGNORE_OFF_PROC_ENTRIES:
1713     a->donotstash = flg;
1714     break;
1715   case MAT_USE_HASH_TABLE:
1716     a->ht_flag = flg;
1717     break;
1718   case MAT_SYMMETRIC:
1719   case MAT_STRUCTURALLY_SYMMETRIC:
1720   case MAT_HERMITIAN:
1721   case MAT_SYMMETRY_ETERNAL:
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     break;
1724   default:
1725     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1732 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1733 {
1734   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1735   Mat_SeqBAIJ    *Aloc;
1736   Mat            B;
1737   PetscErrorCode ierr;
1738   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1739   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1740   MatScalar      *a;
1741 
1742   PetscFunctionBegin;
1743   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1744   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1745     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1746     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1747     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1748     /* Do not know preallocation information, but must set block size */
1749     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1750   } else {
1751     B = *matout;
1752   }
1753 
1754   /* copy over the A part */
1755   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1756   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1757   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1758 
1759   for (i=0; i<mbs; i++) {
1760     rvals[0] = bs*(baij->rstartbs + i);
1761     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1762     for (j=ai[i]; j<ai[i+1]; j++) {
1763       col = (baij->cstartbs+aj[j])*bs;
1764       for (k=0; k<bs; k++) {
1765         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1766 
1767         col++; a += bs;
1768       }
1769     }
1770   }
1771   /* copy over the B part */
1772   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1773   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1774   for (i=0; i<mbs; i++) {
1775     rvals[0] = bs*(baij->rstartbs + i);
1776     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1777     for (j=ai[i]; j<ai[i+1]; j++) {
1778       col = baij->garray[aj[j]]*bs;
1779       for (k=0; k<bs; k++) {
1780         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1781         col++;
1782         a += bs;
1783       }
1784     }
1785   }
1786   ierr = PetscFree(rvals);CHKERRQ(ierr);
1787   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1788   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1789 
1790   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1791   else {
1792     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1793   }
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1799 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1800 {
1801   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1802   Mat            a     = baij->A,b = baij->B;
1803   PetscErrorCode ierr;
1804   PetscInt       s1,s2,s3;
1805 
1806   PetscFunctionBegin;
1807   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1808   if (rr) {
1809     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1810     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1811     /* Overlap communication with computation. */
1812     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1813   }
1814   if (ll) {
1815     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1816     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1817     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1818   }
1819   /* scale  the diagonal block */
1820   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1821 
1822   if (rr) {
1823     /* Do a scatter end and then right scale the off-diagonal block */
1824     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1825     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1832 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1833 {
1834   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1835   PetscInt      *owners = A->rmap->range;
1836   PetscInt       n      = A->rmap->n;
1837   PetscSF        sf;
1838   PetscInt      *lrows;
1839   PetscSFNode   *rrows;
1840   PetscInt       r, p = 0, len = 0;
1841   PetscErrorCode ierr;
1842 
1843   PetscFunctionBegin;
1844   /* Create SF where leaves are input rows and roots are owned rows */
1845   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1846   for (r = 0; r < n; ++r) lrows[r] = -1;
1847   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
1848   for (r = 0; r < N; ++r) {
1849     const PetscInt idx   = rows[r];
1850     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1851     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1852       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1853     }
1854     if (A->nooffproczerorows) {
1855       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1856       lrows[len++] = idx - owners[p];
1857     } else {
1858       rrows[r].rank = p;
1859       rrows[r].index = rows[r] - owners[p];
1860     }
1861   }
1862   if (!A->nooffproczerorows) {
1863     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1864     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1865     /* Collect flags for rows to be zeroed */
1866     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1867     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1868     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1869     /* Compress and put in row numbers */
1870     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1871   }
1872   /* fix right hand side if needed */
1873   if (x && b) {
1874     const PetscScalar *xx;
1875     PetscScalar       *bb;
1876 
1877     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1878     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1879     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1880     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1881     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1882   }
1883 
1884   /* actually zap the local rows */
1885   /*
1886         Zero the required rows. If the "diagonal block" of the matrix
1887      is square and the user wishes to set the diagonal we use separate
1888      code so that MatSetValues() is not called for each diagonal allocating
1889      new memory, thus calling lots of mallocs and slowing things down.
1890 
1891   */
1892   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1893   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1894   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1895     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1896   } else if (diag != 0.0) {
1897     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1898     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1899        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1900     for (r = 0; r < len; ++r) {
1901       const PetscInt row = lrows[r] + A->rmap->rstart;
1902       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1903     }
1904     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1906   } else {
1907     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1908   }
1909   ierr = PetscFree(lrows);CHKERRQ(ierr);
1910 
1911   /* only change matrix nonzero state if pattern was allowed to be changed */
1912   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1913     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1914     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1915   }
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1921 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1922 {
1923   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1924   PetscErrorCode    ierr;
1925   PetscMPIInt       n = A->rmap->n;
1926   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1927   PetscInt          *lrows,*owners = A->rmap->range;
1928   PetscSFNode       *rrows;
1929   PetscSF           sf;
1930   const PetscScalar *xx;
1931   PetscScalar       *bb,*mask;
1932   Vec               xmask,lmask;
1933   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1934   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1935   PetscScalar       *aa;
1936 
1937   PetscFunctionBegin;
1938   /* Create SF where leaves are input rows and roots are owned rows */
1939   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1940   for (r = 0; r < n; ++r) lrows[r] = -1;
1941   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1942   for (r = 0; r < N; ++r) {
1943     const PetscInt idx   = rows[r];
1944     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1945     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1946       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1947     }
1948     rrows[r].rank  = p;
1949     rrows[r].index = rows[r] - owners[p];
1950   }
1951   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1952   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1953   /* Collect flags for rows to be zeroed */
1954   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1955   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1956   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1957   /* Compress and put in row numbers */
1958   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1959   /* zero diagonal part of matrix */
1960   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1961   /* handle off diagonal part of matrix */
1962   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1963   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1964   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1965   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1966   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1967   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1968   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1969   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1970   if (x) {
1971     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1972     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1973     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1974     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1975   }
1976   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1977   /* remove zeroed rows of off diagonal matrix */
1978   for (i = 0; i < len; ++i) {
1979     row   = lrows[i];
1980     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1981     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1982     for (k = 0; k < count; ++k) {
1983       aa[0] = 0.0;
1984       aa   += bs;
1985     }
1986   }
1987   /* loop over all elements of off process part of matrix zeroing removed columns*/
1988   for (i = 0; i < l->B->rmap->N; ++i) {
1989     row = i/bs;
1990     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1991       for (k = 0; k < bs; ++k) {
1992         col = bs*baij->j[j] + k;
1993         if (PetscAbsScalar(mask[col])) {
1994           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1995           if (b) bb[i] -= aa[0]*xx[col];
1996           aa[0] = 0.0;
1997         }
1998       }
1999     }
2000   }
2001   if (x) {
2002     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2003     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
2004   }
2005   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
2006   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
2007   ierr = PetscFree(lrows);CHKERRQ(ierr);
2008 
2009   /* only change matrix nonzero state if pattern was allowed to be changed */
2010   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
2011     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
2012     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2013   }
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
2019 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
2020 {
2021   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatEqual_MPIBAIJ"
2033 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
2034 {
2035   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
2036   Mat            a,b,c,d;
2037   PetscBool      flg;
2038   PetscErrorCode ierr;
2039 
2040   PetscFunctionBegin;
2041   a = matA->A; b = matA->B;
2042   c = matB->A; d = matB->B;
2043 
2044   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2045   if (flg) {
2046     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2047   }
2048   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatCopy_MPIBAIJ"
2054 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
2055 {
2056   PetscErrorCode ierr;
2057   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2058   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2059 
2060   PetscFunctionBegin;
2061   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2062   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2063     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2064   } else {
2065     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2066     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatSetUp_MPIBAIJ"
2073 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
2074 {
2075   PetscErrorCode ierr;
2076 
2077   PetscFunctionBegin;
2078   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
2084 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2085 {
2086   PetscErrorCode ierr;
2087   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
2088   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2089   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2090 
2091   PetscFunctionBegin;
2092   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #undef __FUNCT__
2097 #define __FUNCT__ "MatAXPY_MPIBAIJ"
2098 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2099 {
2100   PetscErrorCode ierr;
2101   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
2102   PetscBLASInt   bnz,one=1;
2103   Mat_SeqBAIJ    *x,*y;
2104 
2105   PetscFunctionBegin;
2106   if (str == SAME_NONZERO_PATTERN) {
2107     PetscScalar alpha = a;
2108     x    = (Mat_SeqBAIJ*)xx->A->data;
2109     y    = (Mat_SeqBAIJ*)yy->A->data;
2110     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2111     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2112     x    = (Mat_SeqBAIJ*)xx->B->data;
2113     y    = (Mat_SeqBAIJ*)yy->B->data;
2114     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2115     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2116     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2117   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2118     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2119   } else {
2120     Mat      B;
2121     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2122     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2123     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2124     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2125     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2126     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2127     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2128     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2129     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2130     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2131     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2132     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2133     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2134     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2135     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2136     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2137   }
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 #undef __FUNCT__
2142 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2143 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2144 {
2145   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2150   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2156 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2157 {
2158   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2163   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2169 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2170 {
2171   PetscErrorCode ierr;
2172   IS             iscol_local;
2173   PetscInt       csize;
2174 
2175   PetscFunctionBegin;
2176   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2177   if (call == MAT_REUSE_MATRIX) {
2178     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2179     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2180   } else {
2181     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2182   }
2183   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2184   if (call == MAT_INITIAL_MATRIX) {
2185     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2186     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2187   }
2188   PetscFunctionReturn(0);
2189 }
2190 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2193 /*
2194   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2195   in local and then by concatenating the local matrices the end result.
2196   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ().
2197   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2198 */
2199 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2200 {
2201   PetscErrorCode ierr;
2202   PetscMPIInt    rank,size;
2203   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2204   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2205   Mat            M,Mreuse;
2206   MatScalar      *vwork,*aa;
2207   MPI_Comm       comm;
2208   IS             isrow_new, iscol_new;
2209   PetscBool      idflag,allrows, allcols;
2210   Mat_SeqBAIJ    *aij;
2211 
2212   PetscFunctionBegin;
2213   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2214   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2216   /* The compression and expansion should be avoided. Doesn't point
2217      out errors, might change the indices, hence buggey */
2218   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2219   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2220 
2221   /* Check for special case: each processor gets entire matrix columns */
2222   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2223   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2224   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2225   else allcols = PETSC_FALSE;
2226 
2227   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2228   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2229   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2230   else allrows = PETSC_FALSE;
2231 
2232   if (call ==  MAT_REUSE_MATRIX) {
2233     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2234     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2235     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2236   } else {
2237     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2238   }
2239   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2240   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2241   /*
2242       m - number of local rows
2243       n - number of columns (same on all processors)
2244       rstart - first row in new global matrix generated
2245   */
2246   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2247   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2248   m    = m/bs;
2249   n    = n/bs;
2250 
2251   if (call == MAT_INITIAL_MATRIX) {
2252     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2253     ii  = aij->i;
2254     jj  = aij->j;
2255 
2256     /*
2257         Determine the number of non-zeros in the diagonal and off-diagonal
2258         portions of the matrix in order to do correct preallocation
2259     */
2260 
2261     /* first get start and end of "diagonal" columns */
2262     if (csize == PETSC_DECIDE) {
2263       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2264       if (mglobal == n*bs) { /* square matrix */
2265         nlocal = m;
2266       } else {
2267         nlocal = n/size + ((n % size) > rank);
2268       }
2269     } else {
2270       nlocal = csize/bs;
2271     }
2272     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2273     rstart = rend - nlocal;
2274     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2275 
2276     /* next, compute all the lengths */
2277     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2278     for (i=0; i<m; i++) {
2279       jend = ii[i+1] - ii[i];
2280       olen = 0;
2281       dlen = 0;
2282       for (j=0; j<jend; j++) {
2283         if (*jj < rstart || *jj >= rend) olen++;
2284         else dlen++;
2285         jj++;
2286       }
2287       olens[i] = olen;
2288       dlens[i] = dlen;
2289     }
2290     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2291     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2292     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2293     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2294     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2295     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2296   } else {
2297     PetscInt ml,nl;
2298 
2299     M    = *newmat;
2300     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2301     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2302     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2303     /*
2304          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2305        rather than the slower MatSetValues().
2306     */
2307     M->was_assembled = PETSC_TRUE;
2308     M->assembled     = PETSC_FALSE;
2309   }
2310   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2311   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2312   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2313   ii   = aij->i;
2314   jj   = aij->j;
2315   aa   = aij->a;
2316   for (i=0; i<m; i++) {
2317     row   = rstart/bs + i;
2318     nz    = ii[i+1] - ii[i];
2319     cwork = jj;     jj += nz;
2320     vwork = aa;     aa += nz*bs*bs;
2321     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2322   }
2323 
2324   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2325   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2326   *newmat = M;
2327 
2328   /* save submatrix used in processor for next request */
2329   if (call ==  MAT_INITIAL_MATRIX) {
2330     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2331     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2332   }
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatPermute_MPIBAIJ"
2338 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2339 {
2340   MPI_Comm       comm,pcomm;
2341   PetscInt       clocal_size,nrows;
2342   const PetscInt *rows;
2343   PetscMPIInt    size;
2344   IS             crowp,lcolp;
2345   PetscErrorCode ierr;
2346 
2347   PetscFunctionBegin;
2348   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2349   /* make a collective version of 'rowp' */
2350   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2351   if (pcomm==comm) {
2352     crowp = rowp;
2353   } else {
2354     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2355     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2356     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2357     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2358   }
2359   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2360   /* make a local version of 'colp' */
2361   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2362   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2363   if (size==1) {
2364     lcolp = colp;
2365   } else {
2366     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2367   }
2368   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2369   /* now we just get the submatrix */
2370   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2371   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2372   /* clean up */
2373   if (pcomm!=comm) {
2374     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2375   }
2376   if (size>1) {
2377     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2378   }
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2384 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2385 {
2386   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2387   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2388 
2389   PetscFunctionBegin;
2390   if (nghosts) *nghosts = B->nbs;
2391   if (ghosts) *ghosts = baij->garray;
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2397 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2398 {
2399   Mat            B;
2400   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2401   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2402   Mat_SeqAIJ     *b;
2403   PetscErrorCode ierr;
2404   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2405   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2406   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2407 
2408   PetscFunctionBegin;
2409   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2410   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2411 
2412   /* ----------------------------------------------------------------
2413      Tell every processor the number of nonzeros per row
2414   */
2415   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2416   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2417     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2418   }
2419   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2420   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2421   displs    = recvcounts + size;
2422   for (i=0; i<size; i++) {
2423     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2424     displs[i]     = A->rmap->range[i]/bs;
2425   }
2426 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2427   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2428 #else
2429   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2430 #endif
2431   /* ---------------------------------------------------------------
2432      Create the sequential matrix of the same type as the local block diagonal
2433   */
2434   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2435   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2436   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2437   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2438   b    = (Mat_SeqAIJ*)B->data;
2439 
2440   /*--------------------------------------------------------------------
2441     Copy my part of matrix column indices over
2442   */
2443   sendcount  = ad->nz + bd->nz;
2444   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2445   a_jsendbuf = ad->j;
2446   b_jsendbuf = bd->j;
2447   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2448   cnt        = 0;
2449   for (i=0; i<n; i++) {
2450 
2451     /* put in lower diagonal portion */
2452     m = bd->i[i+1] - bd->i[i];
2453     while (m > 0) {
2454       /* is it above diagonal (in bd (compressed) numbering) */
2455       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2456       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2457       m--;
2458     }
2459 
2460     /* put in diagonal portion */
2461     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2462       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2463     }
2464 
2465     /* put in upper diagonal portion */
2466     while (m-- > 0) {
2467       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2468     }
2469   }
2470   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2471 
2472   /*--------------------------------------------------------------------
2473     Gather all column indices to all processors
2474   */
2475   for (i=0; i<size; i++) {
2476     recvcounts[i] = 0;
2477     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2478       recvcounts[i] += lens[j];
2479     }
2480   }
2481   displs[0] = 0;
2482   for (i=1; i<size; i++) {
2483     displs[i] = displs[i-1] + recvcounts[i-1];
2484   }
2485 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2486   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2487 #else
2488   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2489 #endif
2490   /*--------------------------------------------------------------------
2491     Assemble the matrix into useable form (note numerical values not yet set)
2492   */
2493   /* set the b->ilen (length of each row) values */
2494   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2495   /* set the b->i indices */
2496   b->i[0] = 0;
2497   for (i=1; i<=A->rmap->N/bs; i++) {
2498     b->i[i] = b->i[i-1] + lens[i-1];
2499   }
2500   ierr = PetscFree(lens);CHKERRQ(ierr);
2501   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2502   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2503   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2504 
2505   if (A->symmetric) {
2506     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2507   } else if (A->hermitian) {
2508     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2509   } else if (A->structurally_symmetric) {
2510     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2511   }
2512   *newmat = B;
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatSOR_MPIBAIJ"
2518 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2519 {
2520   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2521   PetscErrorCode ierr;
2522   Vec            bb1 = 0;
2523 
2524   PetscFunctionBegin;
2525   if (flag == SOR_APPLY_UPPER) {
2526     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2527     PetscFunctionReturn(0);
2528   }
2529 
2530   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2531     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2532   }
2533 
2534   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2535     if (flag & SOR_ZERO_INITIAL_GUESS) {
2536       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2537       its--;
2538     }
2539 
2540     while (its--) {
2541       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2542       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2543 
2544       /* update rhs: bb1 = bb - B*x */
2545       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2546       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2547 
2548       /* local sweep */
2549       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2550     }
2551   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2552     if (flag & SOR_ZERO_INITIAL_GUESS) {
2553       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2554       its--;
2555     }
2556     while (its--) {
2557       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2558       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2559 
2560       /* update rhs: bb1 = bb - B*x */
2561       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2562       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2563 
2564       /* local sweep */
2565       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2566     }
2567   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2568     if (flag & SOR_ZERO_INITIAL_GUESS) {
2569       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2570       its--;
2571     }
2572     while (its--) {
2573       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2574       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2575 
2576       /* update rhs: bb1 = bb - B*x */
2577       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2578       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2579 
2580       /* local sweep */
2581       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2582     }
2583   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2584 
2585   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 #undef __FUNCT__
2590 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2591 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2592 {
2593   PetscErrorCode ierr;
2594   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2595   PetscInt       N,i,*garray = aij->garray;
2596   PetscInt       ib,jb,bs = A->rmap->bs;
2597   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2598   MatScalar      *a_val = a_aij->a;
2599   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2600   MatScalar      *b_val = b_aij->a;
2601   PetscReal      *work;
2602 
2603   PetscFunctionBegin;
2604   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2605   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2606   if (type == NORM_2) {
2607     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2608       for (jb=0; jb<bs; jb++) {
2609         for (ib=0; ib<bs; ib++) {
2610           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2611           a_val++;
2612         }
2613       }
2614     }
2615     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2616       for (jb=0; jb<bs; jb++) {
2617         for (ib=0; ib<bs; ib++) {
2618           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2619           b_val++;
2620         }
2621       }
2622     }
2623   } else if (type == NORM_1) {
2624     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2625       for (jb=0; jb<bs; jb++) {
2626         for (ib=0; ib<bs; ib++) {
2627           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2628           a_val++;
2629         }
2630       }
2631     }
2632     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2633       for (jb=0; jb<bs; jb++) {
2634        for (ib=0; ib<bs; ib++) {
2635           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2636           b_val++;
2637         }
2638       }
2639     }
2640   } else if (type == NORM_INFINITY) {
2641     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2642       for (jb=0; jb<bs; jb++) {
2643         for (ib=0; ib<bs; ib++) {
2644           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2645           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2646           a_val++;
2647         }
2648       }
2649     }
2650     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2651       for (jb=0; jb<bs; jb++) {
2652         for (ib=0; ib<bs; ib++) {
2653           int col = garray[b_aij->j[i]] * bs + jb;
2654           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2655           b_val++;
2656         }
2657       }
2658     }
2659   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2660   if (type == NORM_INFINITY) {
2661     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2662   } else {
2663     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2664   }
2665   ierr = PetscFree(work);CHKERRQ(ierr);
2666   if (type == NORM_2) {
2667     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2668   }
2669   PetscFunctionReturn(0);
2670 }
2671 
2672 #undef __FUNCT__
2673 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2674 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2675 {
2676   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2677   PetscErrorCode ierr;
2678 
2679   PetscFunctionBegin;
2680   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 
2685 /* -------------------------------------------------------------------*/
2686 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2687                                        MatGetRow_MPIBAIJ,
2688                                        MatRestoreRow_MPIBAIJ,
2689                                        MatMult_MPIBAIJ,
2690                                 /* 4*/ MatMultAdd_MPIBAIJ,
2691                                        MatMultTranspose_MPIBAIJ,
2692                                        MatMultTransposeAdd_MPIBAIJ,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                 /*10*/ 0,
2697                                        0,
2698                                        0,
2699                                        MatSOR_MPIBAIJ,
2700                                        MatTranspose_MPIBAIJ,
2701                                 /*15*/ MatGetInfo_MPIBAIJ,
2702                                        MatEqual_MPIBAIJ,
2703                                        MatGetDiagonal_MPIBAIJ,
2704                                        MatDiagonalScale_MPIBAIJ,
2705                                        MatNorm_MPIBAIJ,
2706                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2707                                        MatAssemblyEnd_MPIBAIJ,
2708                                        MatSetOption_MPIBAIJ,
2709                                        MatZeroEntries_MPIBAIJ,
2710                                 /*24*/ MatZeroRows_MPIBAIJ,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                 /*29*/ MatSetUp_MPIBAIJ,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                 /*34*/ MatDuplicate_MPIBAIJ,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                 /*39*/ MatAXPY_MPIBAIJ,
2726                                        MatGetSubMatrices_MPIBAIJ,
2727                                        MatIncreaseOverlap_MPIBAIJ,
2728                                        MatGetValues_MPIBAIJ,
2729                                        MatCopy_MPIBAIJ,
2730                                 /*44*/ 0,
2731                                        MatScale_MPIBAIJ,
2732                                        0,
2733                                        0,
2734                                        MatZeroRowsColumns_MPIBAIJ,
2735                                 /*49*/ 0,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2741                                        0,
2742                                        MatSetUnfactored_MPIBAIJ,
2743                                        MatPermute_MPIBAIJ,
2744                                        MatSetValuesBlocked_MPIBAIJ,
2745                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2746                                        MatDestroy_MPIBAIJ,
2747                                        MatView_MPIBAIJ,
2748                                        0,
2749                                        0,
2750                                 /*64*/ 0,
2751                                        0,
2752                                        0,
2753                                        0,
2754                                        0,
2755                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2756                                        0,
2757                                        0,
2758                                        0,
2759                                        0,
2760                                 /*74*/ 0,
2761                                        MatFDColoringApply_BAIJ,
2762                                        0,
2763                                        0,
2764                                        0,
2765                                 /*79*/ 0,
2766                                        0,
2767                                        0,
2768                                        0,
2769                                        MatLoad_MPIBAIJ,
2770                                 /*84*/ 0,
2771                                        0,
2772                                        0,
2773                                        0,
2774                                        0,
2775                                 /*89*/ 0,
2776                                        0,
2777                                        0,
2778                                        0,
2779                                        0,
2780                                 /*94*/ 0,
2781                                        0,
2782                                        0,
2783                                        0,
2784                                        0,
2785                                 /*99*/ 0,
2786                                        0,
2787                                        0,
2788                                        0,
2789                                        0,
2790                                 /*104*/0,
2791                                        MatRealPart_MPIBAIJ,
2792                                        MatImaginaryPart_MPIBAIJ,
2793                                        0,
2794                                        0,
2795                                 /*109*/0,
2796                                        0,
2797                                        0,
2798                                        0,
2799                                        0,
2800                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2801                                        0,
2802                                        MatGetGhosts_MPIBAIJ,
2803                                        0,
2804                                        0,
2805                                 /*119*/0,
2806                                        0,
2807                                        0,
2808                                        0,
2809                                        MatGetMultiProcBlock_MPIBAIJ,
2810                                 /*124*/0,
2811                                        MatGetColumnNorms_MPIBAIJ,
2812                                        MatInvertBlockDiagonal_MPIBAIJ,
2813                                        0,
2814                                        0,
2815                                /*129*/ 0,
2816                                        0,
2817                                        0,
2818                                        0,
2819                                        0,
2820                                /*134*/ 0,
2821                                        0,
2822                                        0,
2823                                        0,
2824                                        0,
2825                                /*139*/ 0,
2826                                        0,
2827                                        0,
2828                                        MatFDColoringSetUp_MPIXAIJ,
2829                                        0,
2830                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2831 };
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2835 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2836 {
2837   PetscFunctionBegin;
2838   *a = ((Mat_MPIBAIJ*)A->data)->A;
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2843 
2844 #undef __FUNCT__
2845 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2846 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2847 {
2848   PetscInt       m,rstart,cstart,cend;
2849   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2850   const PetscInt *JJ    =0;
2851   PetscScalar    *values=0;
2852   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2853   PetscErrorCode ierr;
2854 
2855   PetscFunctionBegin;
2856   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2857   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2858   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2859   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2860   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2861   m      = B->rmap->n/bs;
2862   rstart = B->rmap->rstart/bs;
2863   cstart = B->cmap->rstart/bs;
2864   cend   = B->cmap->rend/bs;
2865 
2866   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2867   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2868   for (i=0; i<m; i++) {
2869     nz = ii[i+1] - ii[i];
2870     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2871     nz_max = PetscMax(nz_max,nz);
2872     JJ     = jj + ii[i];
2873     for (j=0; j<nz; j++) {
2874       if (*JJ >= cstart) break;
2875       JJ++;
2876     }
2877     d = 0;
2878     for (; j<nz; j++) {
2879       if (*JJ++ >= cend) break;
2880       d++;
2881     }
2882     d_nnz[i] = d;
2883     o_nnz[i] = nz - d;
2884   }
2885   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2886   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2887 
2888   values = (PetscScalar*)V;
2889   if (!values) {
2890     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2891     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2892   }
2893   for (i=0; i<m; i++) {
2894     PetscInt          row    = i + rstart;
2895     PetscInt          ncols  = ii[i+1] - ii[i];
2896     const PetscInt    *icols = jj + ii[i];
2897     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2898       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2899       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2900     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2901       PetscInt j;
2902       for (j=0; j<ncols; j++) {
2903         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2904         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2905       }
2906     }
2907   }
2908 
2909   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2910   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2911   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2912   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2918 /*@C
2919    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2920    (the default parallel PETSc format).
2921 
2922    Collective on MPI_Comm
2923 
2924    Input Parameters:
2925 +  B - the matrix
2926 .  bs - the block size
2927 .  i - the indices into j for the start of each local row (starts with zero)
2928 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2929 -  v - optional values in the matrix
2930 
2931    Level: developer
2932 
2933    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2934    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2935    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2936    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2937    block column and the second index is over columns within a block.
2938 
2939 .keywords: matrix, aij, compressed row, sparse, parallel
2940 
2941 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2942 @*/
2943 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2944 {
2945   PetscErrorCode ierr;
2946 
2947   PetscFunctionBegin;
2948   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2949   PetscValidType(B,1);
2950   PetscValidLogicalCollectiveInt(B,bs,2);
2951   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2957 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2958 {
2959   Mat_MPIBAIJ    *b;
2960   PetscErrorCode ierr;
2961   PetscInt       i;
2962 
2963   PetscFunctionBegin;
2964   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2965   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2966   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2967   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2968 
2969   if (d_nnz) {
2970     for (i=0; i<B->rmap->n/bs; i++) {
2971       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2972     }
2973   }
2974   if (o_nnz) {
2975     for (i=0; i<B->rmap->n/bs; i++) {
2976       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2977     }
2978   }
2979 
2980   b      = (Mat_MPIBAIJ*)B->data;
2981   b->bs2 = bs*bs;
2982   b->mbs = B->rmap->n/bs;
2983   b->nbs = B->cmap->n/bs;
2984   b->Mbs = B->rmap->N/bs;
2985   b->Nbs = B->cmap->N/bs;
2986 
2987   for (i=0; i<=b->size; i++) {
2988     b->rangebs[i] = B->rmap->range[i]/bs;
2989   }
2990   b->rstartbs = B->rmap->rstart/bs;
2991   b->rendbs   = B->rmap->rend/bs;
2992   b->cstartbs = B->cmap->rstart/bs;
2993   b->cendbs   = B->cmap->rend/bs;
2994 
2995   if (!B->preallocated) {
2996     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2997     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2998     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2999     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3000     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3001     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3002     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3003     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3004     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3005   }
3006 
3007   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3008   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3009   B->preallocated = PETSC_TRUE;
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3014 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3015 
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3018 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3019 {
3020   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3021   PetscErrorCode ierr;
3022   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3023   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3024   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3025 
3026   PetscFunctionBegin;
3027   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
3028   ii[0] = 0;
3029   for (i=0; i<M; i++) {
3030     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3031     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3032     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3033     /* remove one from count of matrix has diagonal */
3034     for (j=id[i]; j<id[i+1]; j++) {
3035       if (jd[j] == i) {ii[i+1]--;break;}
3036     }
3037   }
3038   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
3039   cnt  = 0;
3040   for (i=0; i<M; i++) {
3041     for (j=io[i]; j<io[i+1]; j++) {
3042       if (garray[jo[j]] > rstart) break;
3043       jj[cnt++] = garray[jo[j]];
3044     }
3045     for (k=id[i]; k<id[i+1]; k++) {
3046       if (jd[k] != i) {
3047         jj[cnt++] = rstart + jd[k];
3048       }
3049     }
3050     for (; j<io[i+1]; j++) {
3051       jj[cnt++] = garray[jo[j]];
3052     }
3053   }
3054   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3059 
3060 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3061 
3062 #undef __FUNCT__
3063 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3064 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3065 {
3066   PetscErrorCode ierr;
3067   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3068   Mat            B;
3069   Mat_MPIAIJ     *b;
3070 
3071   PetscFunctionBegin;
3072   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3073 
3074   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3075   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3076   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3077   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3078   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3079   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3080   b    = (Mat_MPIAIJ*) B->data;
3081 
3082   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3083   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3084   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3085   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3086   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3087   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3088   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3089   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3090   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3091   if (reuse == MAT_REUSE_MATRIX) {
3092     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3093   } else {
3094    *newmat = B;
3095   }
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 /*MC
3100    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3101 
3102    Options Database Keys:
3103 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3104 . -mat_block_size <bs> - set the blocksize used to store the matrix
3105 - -mat_use_hash_table <fact>
3106 
3107   Level: beginner
3108 
3109 .seealso: MatCreateMPIBAIJ
3110 M*/
3111 
3112 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3113 
3114 #undef __FUNCT__
3115 #define __FUNCT__ "MatCreate_MPIBAIJ"
3116 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3117 {
3118   Mat_MPIBAIJ    *b;
3119   PetscErrorCode ierr;
3120   PetscBool      flg = PETSC_FALSE;
3121 
3122   PetscFunctionBegin;
3123   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3124   B->data = (void*)b;
3125 
3126   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3127   B->assembled = PETSC_FALSE;
3128 
3129   B->insertmode = NOT_SET_VALUES;
3130   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3131   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3132 
3133   /* build local table of row and column ownerships */
3134   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3135 
3136   /* build cache for off array entries formed */
3137   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3138 
3139   b->donotstash  = PETSC_FALSE;
3140   b->colmap      = NULL;
3141   b->garray      = NULL;
3142   b->roworiented = PETSC_TRUE;
3143 
3144   /* stuff used in block assembly */
3145   b->barray = 0;
3146 
3147   /* stuff used for matrix vector multiply */
3148   b->lvec  = 0;
3149   b->Mvctx = 0;
3150 
3151   /* stuff for MatGetRow() */
3152   b->rowindices   = 0;
3153   b->rowvalues    = 0;
3154   b->getrowactive = PETSC_FALSE;
3155 
3156   /* hash table stuff */
3157   b->ht           = 0;
3158   b->hd           = 0;
3159   b->ht_size      = 0;
3160   b->ht_flag      = PETSC_FALSE;
3161   b->ht_fact      = 0;
3162   b->ht_total_ct  = 0;
3163   b->ht_insert_ct = 0;
3164 
3165   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3166   b->ijonly = PETSC_FALSE;
3167 
3168 
3169   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3170   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3171   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3172   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3173   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3174   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3175   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3176   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3177   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3178   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3180   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3181 
3182   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3183   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
3184   if (flg) {
3185     PetscReal fact = 1.39;
3186     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3187     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3188     if (fact <= 1.0) fact = 1.39;
3189     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3190     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3191   }
3192   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3193   PetscFunctionReturn(0);
3194 }
3195 
3196 /*MC
3197    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3198 
3199    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3200    and MATMPIBAIJ otherwise.
3201 
3202    Options Database Keys:
3203 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3204 
3205   Level: beginner
3206 
3207 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3208 M*/
3209 
3210 #undef __FUNCT__
3211 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3212 /*@C
3213    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3214    (block compressed row).  For good matrix assembly performance
3215    the user should preallocate the matrix storage by setting the parameters
3216    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3217    performance can be increased by more than a factor of 50.
3218 
3219    Collective on Mat
3220 
3221    Input Parameters:
3222 +  B - the matrix
3223 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3224           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3225 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3226            submatrix  (same for all local rows)
3227 .  d_nnz - array containing the number of block nonzeros in the various block rows
3228            of the in diagonal portion of the local (possibly different for each block
3229            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3230            set it even if it is zero.
3231 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3232            submatrix (same for all local rows).
3233 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3234            off-diagonal portion of the local submatrix (possibly different for
3235            each block row) or NULL.
3236 
3237    If the *_nnz parameter is given then the *_nz parameter is ignored
3238 
3239    Options Database Keys:
3240 +   -mat_block_size - size of the blocks to use
3241 -   -mat_use_hash_table <fact>
3242 
3243    Notes:
3244    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3245    than it must be used on all processors that share the object for that argument.
3246 
3247    Storage Information:
3248    For a square global matrix we define each processor's diagonal portion
3249    to be its local rows and the corresponding columns (a square submatrix);
3250    each processor's off-diagonal portion encompasses the remainder of the
3251    local matrix (a rectangular submatrix).
3252 
3253    The user can specify preallocated storage for the diagonal part of
3254    the local submatrix with either d_nz or d_nnz (not both).  Set
3255    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3256    memory allocation.  Likewise, specify preallocated storage for the
3257    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3258 
3259    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3260    the figure below we depict these three local rows and all columns (0-11).
3261 
3262 .vb
3263            0 1 2 3 4 5 6 7 8 9 10 11
3264           --------------------------
3265    row 3  |o o o d d d o o o o  o  o
3266    row 4  |o o o d d d o o o o  o  o
3267    row 5  |o o o d d d o o o o  o  o
3268           --------------------------
3269 .ve
3270 
3271    Thus, any entries in the d locations are stored in the d (diagonal)
3272    submatrix, and any entries in the o locations are stored in the
3273    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3274    stored simply in the MATSEQBAIJ format for compressed row storage.
3275 
3276    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3277    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3278    In general, for PDE problems in which most nonzeros are near the diagonal,
3279    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3280    or you will get TERRIBLE performance; see the users' manual chapter on
3281    matrices.
3282 
3283    You can call MatGetInfo() to get information on how effective the preallocation was;
3284    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3285    You can also run with the option -info and look for messages with the string
3286    malloc in them to see if additional memory allocation was needed.
3287 
3288    Level: intermediate
3289 
3290 .keywords: matrix, block, aij, compressed row, sparse, parallel
3291 
3292 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3293 @*/
3294 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3295 {
3296   PetscErrorCode ierr;
3297 
3298   PetscFunctionBegin;
3299   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3300   PetscValidType(B,1);
3301   PetscValidLogicalCollectiveInt(B,bs,2);
3302   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3303   PetscFunctionReturn(0);
3304 }
3305 
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatCreateBAIJ"
3308 /*@C
3309    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3310    (block compressed row).  For good matrix assembly performance
3311    the user should preallocate the matrix storage by setting the parameters
3312    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3313    performance can be increased by more than a factor of 50.
3314 
3315    Collective on MPI_Comm
3316 
3317    Input Parameters:
3318 +  comm - MPI communicator
3319 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3320           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3321 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3322            This value should be the same as the local size used in creating the
3323            y vector for the matrix-vector product y = Ax.
3324 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3325            This value should be the same as the local size used in creating the
3326            x vector for the matrix-vector product y = Ax.
3327 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3328 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3329 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3330            submatrix  (same for all local rows)
3331 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3332            of the in diagonal portion of the local (possibly different for each block
3333            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3334            and set it even if it is zero.
3335 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3336            submatrix (same for all local rows).
3337 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3338            off-diagonal portion of the local submatrix (possibly different for
3339            each block row) or NULL.
3340 
3341    Output Parameter:
3342 .  A - the matrix
3343 
3344    Options Database Keys:
3345 +   -mat_block_size - size of the blocks to use
3346 -   -mat_use_hash_table <fact>
3347 
3348    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3349    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3350    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3351 
3352    Notes:
3353    If the *_nnz parameter is given then the *_nz parameter is ignored
3354 
3355    A nonzero block is any block that as 1 or more nonzeros in it
3356 
3357    The user MUST specify either the local or global matrix dimensions
3358    (possibly both).
3359 
3360    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3361    than it must be used on all processors that share the object for that argument.
3362 
3363    Storage Information:
3364    For a square global matrix we define each processor's diagonal portion
3365    to be its local rows and the corresponding columns (a square submatrix);
3366    each processor's off-diagonal portion encompasses the remainder of the
3367    local matrix (a rectangular submatrix).
3368 
3369    The user can specify preallocated storage for the diagonal part of
3370    the local submatrix with either d_nz or d_nnz (not both).  Set
3371    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3372    memory allocation.  Likewise, specify preallocated storage for the
3373    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3374 
3375    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3376    the figure below we depict these three local rows and all columns (0-11).
3377 
3378 .vb
3379            0 1 2 3 4 5 6 7 8 9 10 11
3380           --------------------------
3381    row 3  |o o o d d d o o o o  o  o
3382    row 4  |o o o d d d o o o o  o  o
3383    row 5  |o o o d d d o o o o  o  o
3384           --------------------------
3385 .ve
3386 
3387    Thus, any entries in the d locations are stored in the d (diagonal)
3388    submatrix, and any entries in the o locations are stored in the
3389    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3390    stored simply in the MATSEQBAIJ format for compressed row storage.
3391 
3392    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3393    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3394    In general, for PDE problems in which most nonzeros are near the diagonal,
3395    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3396    or you will get TERRIBLE performance; see the users' manual chapter on
3397    matrices.
3398 
3399    Level: intermediate
3400 
3401 .keywords: matrix, block, aij, compressed row, sparse, parallel
3402 
3403 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3404 @*/
3405 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3406 {
3407   PetscErrorCode ierr;
3408   PetscMPIInt    size;
3409 
3410   PetscFunctionBegin;
3411   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3412   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3413   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3414   if (size > 1) {
3415     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3416     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3417   } else {
3418     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3419     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3420   }
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3426 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3427 {
3428   Mat            mat;
3429   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3430   PetscErrorCode ierr;
3431   PetscInt       len=0;
3432 
3433   PetscFunctionBegin;
3434   *newmat = 0;
3435   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3436   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3437   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3438   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3439 
3440   mat->factortype   = matin->factortype;
3441   mat->preallocated = PETSC_TRUE;
3442   mat->assembled    = PETSC_TRUE;
3443   mat->insertmode   = NOT_SET_VALUES;
3444 
3445   a             = (Mat_MPIBAIJ*)mat->data;
3446   mat->rmap->bs = matin->rmap->bs;
3447   a->bs2        = oldmat->bs2;
3448   a->mbs        = oldmat->mbs;
3449   a->nbs        = oldmat->nbs;
3450   a->Mbs        = oldmat->Mbs;
3451   a->Nbs        = oldmat->Nbs;
3452 
3453   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3454   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3455 
3456   a->size         = oldmat->size;
3457   a->rank         = oldmat->rank;
3458   a->donotstash   = oldmat->donotstash;
3459   a->roworiented  = oldmat->roworiented;
3460   a->rowindices   = 0;
3461   a->rowvalues    = 0;
3462   a->getrowactive = PETSC_FALSE;
3463   a->barray       = 0;
3464   a->rstartbs     = oldmat->rstartbs;
3465   a->rendbs       = oldmat->rendbs;
3466   a->cstartbs     = oldmat->cstartbs;
3467   a->cendbs       = oldmat->cendbs;
3468 
3469   /* hash table stuff */
3470   a->ht           = 0;
3471   a->hd           = 0;
3472   a->ht_size      = 0;
3473   a->ht_flag      = oldmat->ht_flag;
3474   a->ht_fact      = oldmat->ht_fact;
3475   a->ht_total_ct  = 0;
3476   a->ht_insert_ct = 0;
3477 
3478   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3479   if (oldmat->colmap) {
3480 #if defined(PETSC_USE_CTABLE)
3481     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3482 #else
3483     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3484     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3485     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3486 #endif
3487   } else a->colmap = 0;
3488 
3489   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3490     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3491     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3492     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3493   } else a->garray = 0;
3494 
3495   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3496   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3497   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3498   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3499   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3500 
3501   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3502   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3503   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3504   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3505   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3506   *newmat = mat;
3507   PetscFunctionReturn(0);
3508 }
3509 
3510 #undef __FUNCT__
3511 #define __FUNCT__ "MatLoad_MPIBAIJ"
3512 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3513 {
3514   PetscErrorCode ierr;
3515   int            fd;
3516   PetscInt       i,nz,j,rstart,rend;
3517   PetscScalar    *vals,*buf;
3518   MPI_Comm       comm;
3519   MPI_Status     status;
3520   PetscMPIInt    rank,size,maxnz;
3521   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3522   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3523   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3524   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3525   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3526   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3527 
3528   PetscFunctionBegin;
3529   /* force binary viewer to load .info file if it has not yet done so */
3530   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3531   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3532   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3533   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3534   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3535   if (bs < 0) bs = 1;
3536 
3537   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3538   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3539   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3540   if (!rank) {
3541     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3542     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3543   }
3544 
3545   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3546 
3547   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3548   M    = header[1]; N = header[2];
3549 
3550   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3551   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3552   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3553 
3554   /* If global sizes are set, check if they are consistent with that given in the file */
3555   if (sizesset) {
3556     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3557   }
3558   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3559   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3560 
3561   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3562 
3563   /*
3564      This code adds extra rows to make sure the number of rows is
3565      divisible by the blocksize
3566   */
3567   Mbs        = M/bs;
3568   extra_rows = bs - M + bs*Mbs;
3569   if (extra_rows == bs) extra_rows = 0;
3570   else                  Mbs++;
3571   if (extra_rows && !rank) {
3572     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3573   }
3574 
3575   /* determine ownership of all rows */
3576   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3577     mbs = Mbs/size + ((Mbs % size) > rank);
3578     m   = mbs*bs;
3579   } else { /* User set */
3580     m   = newmat->rmap->n;
3581     mbs = m/bs;
3582   }
3583   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3584   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3585 
3586   /* process 0 needs enough room for process with most rows */
3587   if (!rank) {
3588     mmax = rowners[1];
3589     for (i=2; i<=size; i++) {
3590       mmax = PetscMax(mmax,rowners[i]);
3591     }
3592     mmax*=bs;
3593   } else mmax = -1;             /* unused, but compiler warns anyway */
3594 
3595   rowners[0] = 0;
3596   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3597   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3598   rstart = rowners[rank];
3599   rend   = rowners[rank+1];
3600 
3601   /* distribute row lengths to all processors */
3602   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3603   if (!rank) {
3604     mend = m;
3605     if (size == 1) mend = mend - extra_rows;
3606     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3607     for (j=mend; j<m; j++) locrowlens[j] = 1;
3608     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3609     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3610     for (j=0; j<m; j++) {
3611       procsnz[0] += locrowlens[j];
3612     }
3613     for (i=1; i<size; i++) {
3614       mend = browners[i+1] - browners[i];
3615       if (i == size-1) mend = mend - extra_rows;
3616       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3617       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3618       /* calculate the number of nonzeros on each processor */
3619       for (j=0; j<browners[i+1]-browners[i]; j++) {
3620         procsnz[i] += rowlengths[j];
3621       }
3622       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3623     }
3624     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3625   } else {
3626     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3627   }
3628 
3629   if (!rank) {
3630     /* determine max buffer needed and allocate it */
3631     maxnz = procsnz[0];
3632     for (i=1; i<size; i++) {
3633       maxnz = PetscMax(maxnz,procsnz[i]);
3634     }
3635     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3636 
3637     /* read in my part of the matrix column indices  */
3638     nz     = procsnz[0];
3639     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3640     mycols = ibuf;
3641     if (size == 1) nz -= extra_rows;
3642     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3643     if (size == 1) {
3644       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3645     }
3646 
3647     /* read in every ones (except the last) and ship off */
3648     for (i=1; i<size-1; i++) {
3649       nz   = procsnz[i];
3650       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3651       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3652     }
3653     /* read in the stuff for the last proc */
3654     if (size != 1) {
3655       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3656       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3657       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3658       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3659     }
3660     ierr = PetscFree(cols);CHKERRQ(ierr);
3661   } else {
3662     /* determine buffer space needed for message */
3663     nz = 0;
3664     for (i=0; i<m; i++) {
3665       nz += locrowlens[i];
3666     }
3667     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3668     mycols = ibuf;
3669     /* receive message of column indices*/
3670     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3671     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3672     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3673   }
3674 
3675   /* loop over local rows, determining number of off diagonal entries */
3676   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3677   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3678   rowcount = 0; nzcount = 0;
3679   for (i=0; i<mbs; i++) {
3680     dcount  = 0;
3681     odcount = 0;
3682     for (j=0; j<bs; j++) {
3683       kmax = locrowlens[rowcount];
3684       for (k=0; k<kmax; k++) {
3685         tmp = mycols[nzcount++]/bs;
3686         if (!mask[tmp]) {
3687           mask[tmp] = 1;
3688           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3689           else masked1[dcount++] = tmp;
3690         }
3691       }
3692       rowcount++;
3693     }
3694 
3695     dlens[i]  = dcount;
3696     odlens[i] = odcount;
3697 
3698     /* zero out the mask elements we set */
3699     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3700     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3701   }
3702 
3703 
3704   if (!sizesset) {
3705     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3706   }
3707   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3708 
3709   if (!rank) {
3710     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3711     /* read in my part of the matrix numerical values  */
3712     nz     = procsnz[0];
3713     vals   = buf;
3714     mycols = ibuf;
3715     if (size == 1) nz -= extra_rows;
3716     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3717     if (size == 1) {
3718       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3719     }
3720 
3721     /* insert into matrix */
3722     jj = rstart*bs;
3723     for (i=0; i<m; i++) {
3724       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3725       mycols += locrowlens[i];
3726       vals   += locrowlens[i];
3727       jj++;
3728     }
3729     /* read in other processors (except the last one) and ship out */
3730     for (i=1; i<size-1; i++) {
3731       nz   = procsnz[i];
3732       vals = buf;
3733       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3734       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3735     }
3736     /* the last proc */
3737     if (size != 1) {
3738       nz   = procsnz[i] - extra_rows;
3739       vals = buf;
3740       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3741       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3742       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3743     }
3744     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3745   } else {
3746     /* receive numeric values */
3747     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3748 
3749     /* receive message of values*/
3750     vals   = buf;
3751     mycols = ibuf;
3752     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3753 
3754     /* insert into matrix */
3755     jj = rstart*bs;
3756     for (i=0; i<m; i++) {
3757       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3758       mycols += locrowlens[i];
3759       vals   += locrowlens[i];
3760       jj++;
3761     }
3762   }
3763   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3764   ierr = PetscFree(buf);CHKERRQ(ierr);
3765   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3766   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3767   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3768   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3769   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3770   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3771   PetscFunctionReturn(0);
3772 }
3773 
3774 #undef __FUNCT__
3775 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3776 /*@
3777    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3778 
3779    Input Parameters:
3780 .  mat  - the matrix
3781 .  fact - factor
3782 
3783    Not Collective, each process can use a different factor
3784 
3785    Level: advanced
3786 
3787   Notes:
3788    This can also be set by the command line option: -mat_use_hash_table <fact>
3789 
3790 .keywords: matrix, hashtable, factor, HT
3791 
3792 .seealso: MatSetOption()
3793 @*/
3794 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3795 {
3796   PetscErrorCode ierr;
3797 
3798   PetscFunctionBegin;
3799   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3800   PetscFunctionReturn(0);
3801 }
3802 
3803 #undef __FUNCT__
3804 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3805 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3806 {
3807   Mat_MPIBAIJ *baij;
3808 
3809   PetscFunctionBegin;
3810   baij          = (Mat_MPIBAIJ*)mat->data;
3811   baij->ht_fact = fact;
3812   PetscFunctionReturn(0);
3813 }
3814 
3815 #undef __FUNCT__
3816 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3817 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3818 {
3819   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3820 
3821   PetscFunctionBegin;
3822   if (Ad)     *Ad     = a->A;
3823   if (Ao)     *Ao     = a->B;
3824   if (colmap) *colmap = a->garray;
3825   PetscFunctionReturn(0);
3826 }
3827 
3828 /*
3829     Special version for direct calls from Fortran (to eliminate two function call overheads
3830 */
3831 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3832 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3833 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3834 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3835 #endif
3836 
3837 #undef __FUNCT__
3838 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3839 /*@C
3840   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3841 
3842   Collective on Mat
3843 
3844   Input Parameters:
3845 + mat - the matrix
3846 . min - number of input rows
3847 . im - input rows
3848 . nin - number of input columns
3849 . in - input columns
3850 . v - numerical values input
3851 - addvin - INSERT_VALUES or ADD_VALUES
3852 
3853   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3854 
3855   Level: advanced
3856 
3857 .seealso:   MatSetValuesBlocked()
3858 @*/
3859 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3860 {
3861   /* convert input arguments to C version */
3862   Mat        mat  = *matin;
3863   PetscInt   m    = *min, n = *nin;
3864   InsertMode addv = *addvin;
3865 
3866   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3867   const MatScalar *value;
3868   MatScalar       *barray     = baij->barray;
3869   PetscBool       roworiented = baij->roworiented;
3870   PetscErrorCode  ierr;
3871   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3872   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3873   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3874 
3875   PetscFunctionBegin;
3876   /* tasks normally handled by MatSetValuesBlocked() */
3877   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3878 #if defined(PETSC_USE_DEBUG)
3879   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3880   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3881 #endif
3882   if (mat->assembled) {
3883     mat->was_assembled = PETSC_TRUE;
3884     mat->assembled     = PETSC_FALSE;
3885   }
3886   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3887 
3888 
3889   if (!barray) {
3890     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3891     baij->barray = barray;
3892   }
3893 
3894   if (roworiented) stepval = (n-1)*bs;
3895   else stepval = (m-1)*bs;
3896 
3897   for (i=0; i<m; i++) {
3898     if (im[i] < 0) continue;
3899 #if defined(PETSC_USE_DEBUG)
3900     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3901 #endif
3902     if (im[i] >= rstart && im[i] < rend) {
3903       row = im[i] - rstart;
3904       for (j=0; j<n; j++) {
3905         /* If NumCol = 1 then a copy is not required */
3906         if ((roworiented) && (n == 1)) {
3907           barray = (MatScalar*)v + i*bs2;
3908         } else if ((!roworiented) && (m == 1)) {
3909           barray = (MatScalar*)v + j*bs2;
3910         } else { /* Here a copy is required */
3911           if (roworiented) {
3912             value = v + i*(stepval+bs)*bs + j*bs;
3913           } else {
3914             value = v + j*(stepval+bs)*bs + i*bs;
3915           }
3916           for (ii=0; ii<bs; ii++,value+=stepval) {
3917             for (jj=0; jj<bs; jj++) {
3918               *barray++ = *value++;
3919             }
3920           }
3921           barray -=bs2;
3922         }
3923 
3924         if (in[j] >= cstart && in[j] < cend) {
3925           col  = in[j] - cstart;
3926           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,1,&row,1,&col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3927         } else if (in[j] < 0) continue;
3928 #if defined(PETSC_USE_DEBUG)
3929         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3930 #endif
3931         else {
3932           if (mat->was_assembled) {
3933             if (!baij->colmap) {
3934               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3935             }
3936 
3937 #if defined(PETSC_USE_DEBUG)
3938 #if defined(PETSC_USE_CTABLE)
3939             { PetscInt data;
3940               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3941               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3942             }
3943 #else
3944             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3945 #endif
3946 #endif
3947 #if defined(PETSC_USE_CTABLE)
3948             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3949             col  = (col - 1)/bs;
3950 #else
3951             col = (baij->colmap[in[j]] - 1)/bs;
3952 #endif
3953             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3954               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3955               col  =  in[j];
3956             }
3957           } else col = in[j];
3958           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,1,&row,1,&col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3959         }
3960       }
3961     } else {
3962       if (!baij->donotstash) {
3963         if (roworiented) {
3964           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3965         } else {
3966           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3967         }
3968       }
3969     }
3970   }
3971 
3972   /* task normally handled by MatSetValuesBlocked() */
3973   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3974   PetscFunctionReturn(0);
3975 }
3976 
3977 #undef __FUNCT__
3978 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3979 /*@
3980      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3981          CSR format the local rows.
3982 
3983    Collective on MPI_Comm
3984 
3985    Input Parameters:
3986 +  comm - MPI communicator
3987 .  bs - the block size, only a block size of 1 is supported
3988 .  m - number of local rows (Cannot be PETSC_DECIDE)
3989 .  n - This value should be the same as the local size used in creating the
3990        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3991        calculated if N is given) For square matrices n is almost always m.
3992 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3993 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3994 .   i - row indices
3995 .   j - column indices
3996 -   a - matrix values
3997 
3998    Output Parameter:
3999 .   mat - the matrix
4000 
4001    Level: intermediate
4002 
4003    Notes:
4004        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4005      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4006      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4007 
4008      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
4009      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
4010      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
4011      with column-major ordering within blocks.
4012 
4013        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4014 
4015 .keywords: matrix, aij, compressed row, sparse, parallel
4016 
4017 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4018           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4019 @*/
4020 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4021 {
4022   PetscErrorCode ierr;
4023 
4024   PetscFunctionBegin;
4025   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4026   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4027   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4028   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4029   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4030   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4031   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4032   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4033   PetscFunctionReturn(0);
4034 }
4035 
4036 #undef __FUNCT__
4037 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ"
4038 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4039 {
4040   PetscErrorCode ierr;
4041   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
4042   PetscInt       *indx;
4043   PetscScalar    *values;
4044 
4045   PetscFunctionBegin;
4046   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4047   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4048     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
4049     PetscInt       *dnz,*onz,sum,mbs,Nbs;
4050     PetscInt       *bindx,rmax=a->rmax,j;
4051 
4052     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4053     mbs = m/bs; Nbs = N/cbs;
4054     if (n == PETSC_DECIDE) {
4055       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
4056     }
4057     /* Check sum(n) = Nbs */
4058     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4059     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
4060 
4061     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4062     rstart -= mbs;
4063 
4064     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
4065     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
4066     for (i=0; i<mbs; i++) {
4067       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
4068       nnz = nnz/bs;
4069       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
4070       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
4071       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
4072     }
4073     ierr = PetscFree(bindx);CHKERRQ(ierr);
4074 
4075     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4076     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4077     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4078     ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr);
4079     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
4080     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4081   }
4082 
4083   /* numeric phase */
4084   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4085   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4086 
4087   for (i=0; i<m; i++) {
4088     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4089     Ii   = i + rstart;
4090     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4091     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4092   }
4093   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4094   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4095   PetscFunctionReturn(0);
4096 }
4097