xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4c7a3774e977609a1361fb32e8880e2ca3fb0834)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
11 {
12   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
17   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
22 {
23   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
28   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
33   { \
34  \
35     brow = row/bs;  \
36     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
37     rmax = aimax[brow]; nrow = ailen[brow]; \
38     bcol = col/bs; \
39     ridx = row % bs; cidx = col % bs; \
40     low  = 0; high = nrow; \
41     while (high-low > 3) { \
42       t = (low+high)/2; \
43       if (rp[t] > bcol) high = t; \
44       else              low  = t; \
45     } \
46     for (_i=low; _i<high; _i++) { \
47       if (rp[_i] > bcol) break; \
48       if (rp[_i] == bcol) { \
49         bap = ap + bs2*_i + bs*cidx + ridx; \
50         if (addv == ADD_VALUES) *bap += value;  \
51         else                    *bap  = value;  \
52         goto a_noinsert; \
53       } \
54     } \
55     if (a->nonew == 1) goto a_noinsert; \
56     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
57     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
58     N = nrow++ - 1;  \
59     /* shift up all the later entries in this row */ \
60     for (ii=N; ii>=_i; ii--) { \
61       rp[ii+1] = rp[ii]; \
62       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
63     } \
64     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
65     rp[_i]                      = bcol;  \
66     ap[bs2*_i + bs*cidx + ridx] = value;  \
67     A->nonzerostate++;\
68 a_noinsert:; \
69     ailen[brow] = nrow; \
70   }
71 
72 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
73   { \
74     brow = row/bs;  \
75     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
76     rmax = bimax[brow]; nrow = bilen[brow]; \
77     bcol = col/bs; \
78     ridx = row % bs; cidx = col % bs; \
79     low  = 0; high = nrow; \
80     while (high-low > 3) { \
81       t = (low+high)/2; \
82       if (rp[t] > bcol) high = t; \
83       else              low  = t; \
84     } \
85     for (_i=low; _i<high; _i++) { \
86       if (rp[_i] > bcol) break; \
87       if (rp[_i] == bcol) { \
88         bap = ap + bs2*_i + bs*cidx + ridx; \
89         if (addv == ADD_VALUES) *bap += value;  \
90         else                    *bap  = value;  \
91         goto b_noinsert; \
92       } \
93     } \
94     if (b->nonew == 1) goto b_noinsert; \
95     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
96     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
97     N = nrow++ - 1;  \
98     /* shift up all the later entries in this row */ \
99     for (ii=N; ii>=_i; ii--) { \
100       rp[ii+1] = rp[ii]; \
101       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
102     } \
103     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
104     rp[_i]                      = bcol;  \
105     ap[bs2*_i + bs*cidx + ridx] = value;  \
106     B->nonzerostate++;\
107 b_noinsert:; \
108     bilen[brow] = nrow; \
109   }
110 
111 /* Only add/insert a(i,j) with i<=j (blocks).
112    Any a(i,j) with i>j input by user is ingored.
113 */
114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
117   MatScalar      value;
118   PetscBool      roworiented = baij->roworiented;
119   PetscErrorCode ierr;
120   PetscInt       i,j,row,col;
121   PetscInt       rstart_orig=mat->rmap->rstart;
122   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
123   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
124 
125   /* Some Variables required in the macro */
126   Mat          A     = baij->A;
127   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
128   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
129   MatScalar    *aa   =a->a;
130 
131   Mat         B     = baij->B;
132   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
133   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
134   MatScalar   *ba   =b->a;
135 
136   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
137   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
138   MatScalar *ap,*bap;
139 
140   /* for stash */
141   PetscInt  n_loc, *in_loc = NULL;
142   MatScalar *v_loc = NULL;
143 
144   PetscFunctionBegin;
145   if (!baij->donotstash) {
146     if (n > baij->n_loc) {
147       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
148       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
149       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
150       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
151 
152       baij->n_loc = n;
153     }
154     in_loc = baij->in_loc;
155     v_loc  = baij->v_loc;
156   }
157 
158   for (i=0; i<m; i++) {
159     if (im[i] < 0) continue;
160 #if defined(PETSC_USE_DEBUG)
161     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
162 #endif
163     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
164       row = im[i] - rstart_orig;              /* local row index */
165       for (j=0; j<n; j++) {
166         if (im[i]/bs > in[j]/bs) {
167           if (a->ignore_ltriangular) {
168             continue;    /* ignore lower triangular blocks */
169           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
170         }
171         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
172           col  = in[j] - cstart_orig;         /* local col index */
173           brow = row/bs; bcol = col/bs;
174           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
175           if (roworiented) value = v[i*n+j];
176           else             value = v[i+j*m];
177           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
178           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
179         } else if (in[j] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
182 #endif
183         else {  /* off-diag entry (B) */
184           if (mat->was_assembled) {
185             if (!baij->colmap) {
186               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187             }
188 #if defined(PETSC_USE_CTABLE)
189             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
190             col  = col - 1;
191 #else
192             col = baij->colmap[in[j]/bs] - 1;
193 #endif
194             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
195               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
196               col  =  in[j];
197               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
198               B    = baij->B;
199               b    = (Mat_SeqBAIJ*)(B)->data;
200               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
201               ba   = b->a;
202             } else col += in[j]%bs;
203           } else col = in[j];
204           if (roworiented) value = v[i*n+j];
205           else             value = v[i+j*m];
206           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
207           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
208         }
209       }
210     } else {  /* off processor entry */
211       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
212       if (!baij->donotstash) {
213         mat->assembled = PETSC_FALSE;
214         n_loc          = 0;
215         for (j=0; j<n; j++) {
216           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
217           in_loc[n_loc] = in[j];
218           if (roworiented) {
219             v_loc[n_loc] = v[i*n+j];
220           } else {
221             v_loc[n_loc] = v[j*m+i];
222           }
223           n_loc++;
224         }
225         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
226       }
227     }
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
233 {
234   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
235   PetscErrorCode    ierr;
236   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
237   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
238   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
239   PetscBool         roworiented=a->roworiented;
240   const PetscScalar *value     = v;
241   MatScalar         *ap,*aa = a->a,*bap;
242 
243   PetscFunctionBegin;
244   if (col < row) {
245     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
246     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
247   }
248   rp   = aj + ai[row];
249   ap   = aa + bs2*ai[row];
250   rmax = imax[row];
251   nrow = ailen[row];
252   value = v;
253   low   = 0;
254   high  = nrow;
255 
256   while (high-low > 7) {
257     t = (low+high)/2;
258     if (rp[t] > col) high = t;
259     else             low  = t;
260   }
261   for (i=low; i<high; i++) {
262     if (rp[i] > col) break;
263     if (rp[i] == col) {
264       bap = ap +  bs2*i;
265       if (roworiented) {
266         if (is == ADD_VALUES) {
267           for (ii=0; ii<bs; ii++) {
268             for (jj=ii; jj<bs2; jj+=bs) {
269               bap[jj] += *value++;
270             }
271           }
272         } else {
273           for (ii=0; ii<bs; ii++) {
274             for (jj=ii; jj<bs2; jj+=bs) {
275               bap[jj] = *value++;
276             }
277           }
278         }
279       } else {
280         if (is == ADD_VALUES) {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=0; jj<bs; jj++) {
283               *bap++ += *value++;
284             }
285           }
286         } else {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=0; jj<bs; jj++) {
289               *bap++  = *value++;
290             }
291           }
292         }
293       }
294       goto noinsert2;
295     }
296   }
297   if (nonew == 1) goto noinsert2;
298   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
299   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
300   N = nrow++ - 1; high++;
301   /* shift up all the later entries in this row */
302   for (ii=N; ii>=i; ii--) {
303     rp[ii+1] = rp[ii];
304     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
305   }
306   if (N >= i) {
307     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
308   }
309   rp[i] = col;
310   bap   = ap +  bs2*i;
311   if (roworiented) {
312     for (ii=0; ii<bs; ii++) {
313       for (jj=ii; jj<bs2; jj+=bs) {
314         bap[jj] = *value++;
315       }
316     }
317   } else {
318     for (ii=0; ii<bs; ii++) {
319       for (jj=0; jj<bs; jj++) {
320         *bap++ = *value++;
321       }
322     }
323   }
324   noinsert2:;
325   ailen[row] = nrow;
326   PetscFunctionReturn(0);
327 }
328 
329 /*
330    This routine is exactly duplicated in mpibaij.c
331 */
332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
333 {
334   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
335   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
336   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
337   PetscErrorCode    ierr;
338   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
339   PetscBool         roworiented=a->roworiented;
340   const PetscScalar *value     = v;
341   MatScalar         *ap,*aa = a->a,*bap;
342 
343   PetscFunctionBegin;
344   rp   = aj + ai[row];
345   ap   = aa + bs2*ai[row];
346   rmax = imax[row];
347   nrow = ailen[row];
348   low  = 0;
349   high = nrow;
350   value = v;
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++,value+=bs) {
377             for (jj=0; jj<bs; jj++) {
378               bap[jj] += value[jj];
379             }
380             bap += bs;
381           }
382         } else {
383           for (ii=0; ii<bs; ii++,value+=bs) {
384             for (jj=0; jj<bs; jj++) {
385               bap[jj]  = value[jj];
386             }
387             bap += bs;
388           }
389         }
390       }
391       goto noinsert2;
392     }
393   }
394   if (nonew == 1) goto noinsert2;
395   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
396   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
397   N = nrow++ - 1; high++;
398   /* shift up all the later entries in this row */
399   for (ii=N; ii>=i; ii--) {
400     rp[ii+1] = rp[ii];
401     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
402   }
403   if (N >= i) {
404     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
405   }
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427     This routine could be optimized by removing the need for the block copy below and passing stride information
428   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
429 */
430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
431 {
432   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
433   const MatScalar *value;
434   MatScalar       *barray     =baij->barray;
435   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
436   PetscErrorCode  ierr;
437   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
438   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
439   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   if (!barray) {
443     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
444     baij->barray = barray;
445   }
446 
447   if (roworiented) {
448     stepval = (n-1)*bs;
449   } else {
450     stepval = (m-1)*bs;
451   }
452   for (i=0; i<m; i++) {
453     if (im[i] < 0) continue;
454 #if defined(PETSC_USE_DEBUG)
455     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
456 #endif
457     if (im[i] >= rstart && im[i] < rend) {
458       row = im[i] - rstart;
459       for (j=0; j<n; j++) {
460         if (im[i] > in[j]) {
461           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
462           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
463         }
464         /* If NumCol = 1 then a copy is not required */
465         if ((roworiented) && (n == 1)) {
466           barray = (MatScalar*) v + i*bs2;
467         } else if ((!roworiented) && (m == 1)) {
468           barray = (MatScalar*) v + j*bs2;
469         } else { /* Here a copy is required */
470           if (roworiented) {
471             value = v + i*(stepval+bs)*bs + j*bs;
472           } else {
473             value = v + j*(stepval+bs)*bs + i*bs;
474           }
475           for (ii=0; ii<bs; ii++,value+=stepval) {
476             for (jj=0; jj<bs; jj++) {
477               *barray++ = *value++;
478             }
479           }
480           barray -=bs2;
481         }
482 
483         if (in[j] >= cstart && in[j] < cend) {
484           col  = in[j] - cstart;
485           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
486         } else if (in[j] < 0) continue;
487 #if defined(PETSC_USE_DEBUG)
488         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
489 #endif
490         else {
491           if (mat->was_assembled) {
492             if (!baij->colmap) {
493               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
494             }
495 
496 #if defined(PETSC_USE_DEBUG)
497 #if defined(PETSC_USE_CTABLE)
498             { PetscInt data;
499               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
500               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
501             }
502 #else
503             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
504 #endif
505 #endif
506 #if defined(PETSC_USE_CTABLE)
507             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
508             col  = (col - 1)/bs;
509 #else
510             col = (baij->colmap[in[j]] - 1)/bs;
511 #endif
512             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
513               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
514               col  = in[j];
515             }
516           } else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
522       if (!baij->donotstash) {
523         if (roworiented) {
524           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
535 {
536   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
537   PetscErrorCode ierr;
538   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
539   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
540 
541   PetscFunctionBegin;
542   for (i=0; i<m; i++) {
543     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
544     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
545     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
546       row = idxm[i] - bsrstart;
547       for (j=0; j<n; j++) {
548         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
549         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
550         if (idxn[j] >= bscstart && idxn[j] < bscend) {
551           col  = idxn[j] - bscstart;
552           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
553         } else {
554           if (!baij->colmap) {
555             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
556           }
557 #if defined(PETSC_USE_CTABLE)
558           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
559           data--;
560 #else
561           data = baij->colmap[idxn[j]/bs]-1;
562 #endif
563           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
564           else {
565             col  = data + idxn[j]%bs;
566             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567           }
568         }
569       }
570     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
576 {
577   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
578   PetscErrorCode ierr;
579   PetscReal      sum[2],*lnorm2;
580 
581   PetscFunctionBegin;
582   if (baij->size == 1) {
583     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
584   } else {
585     if (type == NORM_FROBENIUS) {
586       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
587       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
588       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
589       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
590       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
591       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
592       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
593       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
594     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
595       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
596       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
597       PetscReal    *rsum,*rsum2,vabs;
598       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
599       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
600       MatScalar    *v;
601 
602       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
603       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
604       /* Amat */
605       v = amat->a; jj = amat->j;
606       for (brow=0; brow<mbs; brow++) {
607         grow = bs*(rstart + brow);
608         nz   = amat->i[brow+1] - amat->i[brow];
609         for (bcol=0; bcol<nz; bcol++) {
610           gcol = bs*(rstart + *jj); jj++;
611           for (col=0; col<bs; col++) {
612             for (row=0; row<bs; row++) {
613               vabs            = PetscAbsScalar(*v); v++;
614               rsum[gcol+col] += vabs;
615               /* non-diagonal block */
616               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
617             }
618           }
619         }
620         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
621       }
622       /* Bmat */
623       v = bmat->a; jj = bmat->j;
624       for (brow=0; brow<mbs; brow++) {
625         grow = bs*(rstart + brow);
626         nz = bmat->i[brow+1] - bmat->i[brow];
627         for (bcol=0; bcol<nz; bcol++) {
628           gcol = bs*garray[*jj]; jj++;
629           for (col=0; col<bs; col++) {
630             for (row=0; row<bs; row++) {
631               vabs            = PetscAbsScalar(*v); v++;
632               rsum[gcol+col] += vabs;
633               rsum[grow+row] += vabs;
634             }
635           }
636         }
637         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
638       }
639       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640       *norm = 0.0;
641       for (col=0; col<mat->cmap->N; col++) {
642         if (rsum2[col] > *norm) *norm = rsum2[col];
643       }
644       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
653   PetscErrorCode ierr;
654   PetscInt       nstash,reallocs;
655 
656   PetscFunctionBegin;
657   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
658 
659   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
671   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
674   PetscInt       *row,*col;
675   PetscBool      other_disassembled;
676   PetscMPIInt    n;
677   PetscBool      r1,r2,r3;
678   MatScalar      *val;
679 
680   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
681   PetscFunctionBegin;
682   if (!baij->donotstash &&  !mat->nooffprocentries) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       if (!flg) break;
686 
687       for (i=0; i<n;) {
688         /* Now identify the consecutive vals belonging to the same row */
689         for (j=i,rstart=row[j]; j<n; j++) {
690           if (row[j] != rstart) break;
691         }
692         if (j < n) ncols = j-i;
693         else       ncols = n-i;
694         /* Now assemble all these values with a single function call */
695         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
696         i    = j;
697       }
698     }
699     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
700     /* Now process the block-stash. Since the values are stashed column-oriented,
701        set the roworiented flag to column oriented, and after MatSetValues()
702        restore the original flags */
703     r1 = baij->roworiented;
704     r2 = a->roworiented;
705     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
706 
707     baij->roworiented = PETSC_FALSE;
708     a->roworiented    = PETSC_FALSE;
709 
710     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) {
718           if (row[j] != rstart) break;
719         }
720         if (j < n) ncols = j-i;
721         else       ncols = n-i;
722         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
723         i    = j;
724       }
725     }
726     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
727 
728     baij->roworiented = r1;
729     a->roworiented    = r2;
730 
731     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
732   }
733 
734   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
735   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
736 
737   /* determine if any processor has disassembled, if so we must
738      also disassemble ourselfs, in order that we may reassemble. */
739   /*
740      if nonzero structure of submatrix B cannot change then we know that
741      no processor disassembled thus we can skip this stuff
742   */
743   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
744     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745     if (mat->was_assembled && !other_disassembled) {
746       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
747     }
748   }
749 
750   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
751     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
752   }
753   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
754   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
755 
756   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
757 
758   baij->rowvalues = 0;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
762     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
769 #include <petscdraw.h>
770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
773   PetscErrorCode    ierr;
774   PetscInt          bs   = mat->rmap->bs;
775   PetscMPIInt       rank = baij->rank;
776   PetscBool         iascii,isdraw;
777   PetscViewer       sviewer;
778   PetscViewerFormat format;
779 
780   PetscFunctionBegin;
781   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
782   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
783   if (iascii) {
784     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
785     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
786       MatInfo info;
787       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
788       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
789       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
790       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
791       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
793       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
794       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
795       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
796       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
798       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
799       PetscFunctionReturn(0);
800     } else if (format == PETSC_VIEWER_ASCII_INFO) {
801       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw draw;
810     PetscBool isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
813     if (isnull) PetscFunctionReturn(0);
814   }
815 
816   {
817     /* assemble the entire matrix onto first processor. */
818     Mat          A;
819     Mat_SeqSBAIJ *Aloc;
820     Mat_SeqBAIJ  *Bloc;
821     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
822     MatScalar    *a;
823     const char   *matname;
824 
825     /* Should this be the same type as mat? */
826     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
827     if (!rank) {
828       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
829     } else {
830       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
831     }
832     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
833     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
834     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
835     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
836 
837     /* copy over the A part */
838     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
839     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
840     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
841 
842     for (i=0; i<mbs; i++) {
843       rvals[0] = bs*(baij->rstartbs + i);
844       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
845       for (j=ai[i]; j<ai[i+1]; j++) {
846         col = (baij->cstartbs+aj[j])*bs;
847         for (k=0; k<bs; k++) {
848           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
849           col++;
850           a += bs;
851         }
852       }
853     }
854     /* copy over the B part */
855     Bloc = (Mat_SeqBAIJ*)baij->B->data;
856     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
857     for (i=0; i<mbs; i++) {
858 
859       rvals[0] = bs*(baij->rstartbs + i);
860       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
861       for (j=ai[i]; j<ai[i+1]; j++) {
862         col = baij->garray[aj[j]]*bs;
863         for (k=0; k<bs; k++) {
864           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
865           col++;
866           a += bs;
867         }
868       }
869     }
870     ierr = PetscFree(rvals);CHKERRQ(ierr);
871     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
872     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873     /*
874        Everyone has to call to draw the matrix since the graphics waits are
875        synchronized across all processors that share the PetscDraw object
876     */
877     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
878     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
879     if (!rank) {
880       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
881       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
882     }
883     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
884     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
885     ierr = MatDestroy(&A);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
891 {
892   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
893   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
894   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
895   PetscErrorCode ierr;
896   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
897   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
898   int            fd;
899   PetscScalar    *column_values;
900   FILE           *file;
901   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
902   PetscInt       message_count,flowcontrolcount;
903 
904   PetscFunctionBegin;
905   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
906   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
907   nz   = bs2*(A->nz + B->nz);
908   rlen = mat->rmap->n;
909   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
910   if (!rank) {
911     header[0] = MAT_FILE_CLASSID;
912     header[1] = mat->rmap->N;
913     header[2] = mat->cmap->N;
914 
915     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
916     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
917     /* get largest number of rows any processor has */
918     range = mat->rmap->range;
919     for (i=1; i<size; i++) {
920       rlen = PetscMax(rlen,range[i+1] - range[i]);
921     }
922   } else {
923     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
924   }
925 
926   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
927   /* compute lengths of each row  */
928   for (i=0; i<a->mbs; i++) {
929     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
930   }
931   /* store the row lengths to the file */
932   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
933   if (!rank) {
934     MPI_Status status;
935     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
936     rlen = (range[1] - range[0])/bs;
937     for (i=0; i<rlen; i++) {
938       for (j=0; j<bs; j++) {
939         row_lens[i*bs+j] = bs*crow_lens[i];
940       }
941     }
942     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     for (i=1; i<size; i++) {
944       rlen = (range[i+1] - range[i])/bs;
945       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
946       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
947       for (k=0; k<rlen; k++) {
948         for (j=0; j<bs; j++) {
949           row_lens[k*bs+j] = bs*crow_lens[k];
950         }
951       }
952       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
953     }
954     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
955     ierr = PetscFree(row_lens);CHKERRQ(ierr);
956   } else {
957     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
958     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
960   }
961   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
962 
963   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
964      information needed to make it for each row from a block row. This does require more communication but still not more than
965      the communication needed for the nonzero values  */
966   nzmax = nz; /*  space a largest processor needs */
967   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
968   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
969   cnt   = 0;
970   for (i=0; i<a->mbs; i++) {
971     pcnt = cnt;
972     for (j=B->i[i]; j<B->i[i+1]; j++) {
973       if ((col = garray[B->j[j]]) > cstart) break;
974       for (l=0; l<bs; l++) {
975         column_indices[cnt++] = bs*col+l;
976       }
977     }
978     for (k=A->i[i]; k<A->i[i+1]; k++) {
979       for (l=0; l<bs; l++) {
980         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
981       }
982     }
983     for (; j<B->i[i+1]; j++) {
984       for (l=0; l<bs; l++) {
985         column_indices[cnt++] = bs*garray[B->j[j]]+l;
986       }
987     }
988     len = cnt - pcnt;
989     for (k=1; k<bs; k++) {
990       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
991       cnt += len;
992     }
993   }
994   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
995 
996   /* store the columns to the file */
997   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
998   if (!rank) {
999     MPI_Status status;
1000     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1001     for (i=1; i<size; i++) {
1002       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1003       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1004       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1005       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1006     }
1007     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1008   } else {
1009     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1010     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1012     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the numerical values */
1017   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<a->mbs; i++) {
1020     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1021     for (j=B->i[i]; j<B->i[i+1]; j++) {
1022       if (garray[B->j[j]] > cstart) break;
1023       for (l=0; l<bs; l++) {
1024         for (ll=0; ll<bs; ll++) {
1025           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1026         }
1027       }
1028       cnt += bs;
1029     }
1030     for (k=A->i[i]; k<A->i[i+1]; k++) {
1031       for (l=0; l<bs; l++) {
1032         for (ll=0; ll<bs; ll++) {
1033           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1034         }
1035       }
1036       cnt += bs;
1037     }
1038     for (; j<B->i[i+1]; j++) {
1039       for (l=0; l<bs; l++) {
1040         for (ll=0; ll<bs; ll++) {
1041           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1042         }
1043       }
1044       cnt += bs;
1045     }
1046     cnt += (bs-1)*rlen;
1047   }
1048   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1049 
1050   /* store the column values to the file */
1051   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1052   if (!rank) {
1053     MPI_Status status;
1054     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1055     for (i=1; i<size; i++) {
1056       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1057       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1058       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1059       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1060     }
1061     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1062   } else {
1063     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1064     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1065     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1066     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1067   }
1068   ierr = PetscFree(column_values);CHKERRQ(ierr);
1069 
1070   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1071   if (file) {
1072     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1078 {
1079   PetscErrorCode ierr;
1080   PetscBool      iascii,isdraw,issocket,isbinary;
1081 
1082   PetscFunctionBegin;
1083   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1084   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1085   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1086   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1087   if (iascii || isdraw || issocket) {
1088     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1089   } else if (isbinary) {
1090     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1096 {
1097   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101 #if defined(PETSC_USE_LOG)
1102   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1103 #endif
1104   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1105   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1106   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1107   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1108 #if defined(PETSC_USE_CTABLE)
1109   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1110 #else
1111   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1112 #endif
1113   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1114   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1115   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1116   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1117   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1118   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1119   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1120   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1121   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1122   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1124   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1125   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1126   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1127   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1128 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1129   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1130 #endif
1131   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1132   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1133   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1134   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1135 
1136   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140 #if defined(PETSC_HAVE_ELEMENTAL)
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1142 #endif
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1149   PetscErrorCode    ierr;
1150   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1151   PetscScalar       *from;
1152   const PetscScalar *x;
1153 
1154   PetscFunctionBegin;
1155   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1156   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1157 
1158   /* diagonal part */
1159   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1160   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1161 
1162   /* subdiagonal part */
1163   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1164 
1165   /* copy x into the vec slvec0 */
1166   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1167   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1168 
1169   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1170   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1171   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1172 
1173   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1174   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175   /* supperdiagonal part */
1176   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1181 {
1182   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1183   PetscErrorCode    ierr;
1184   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1185   PetscScalar       *from;
1186   const PetscScalar *x;
1187 
1188   PetscFunctionBegin;
1189   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1190   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1191 
1192   /* diagonal part */
1193   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1194   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1195 
1196   /* subdiagonal part */
1197   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1198 
1199   /* copy x into the vec slvec0 */
1200   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1201   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1202 
1203   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1204   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1205   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1206 
1207   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1208   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1209   /* supperdiagonal part */
1210   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1215 {
1216   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       nt;
1219 
1220   PetscFunctionBegin;
1221   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1222   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1223 
1224   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1225   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1226 
1227   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228   /* do diagonal part */
1229   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1230   /* do supperdiagonal part */
1231   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1233   /* do subdiagonal part */
1234   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1235   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1241 {
1242   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1243   PetscErrorCode    ierr;
1244   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1245   PetscScalar       *from,zero=0.0;
1246   const PetscScalar *x;
1247 
1248   PetscFunctionBegin;
1249   /*
1250   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1251   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1252   */
1253   /* diagonal part */
1254   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1255   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1256 
1257   /* subdiagonal part */
1258   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1259 
1260   /* copy x into the vec slvec0 */
1261   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1262   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1263   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1264   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1265 
1266   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1268   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269 
1270   /* supperdiagonal part */
1271   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1276 {
1277   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1278   PetscErrorCode ierr;
1279 
1280   PetscFunctionBegin;
1281   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282   /* do diagonal part */
1283   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1284   /* do supperdiagonal part */
1285   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1286   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1287 
1288   /* do subdiagonal part */
1289   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1291   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 /*
1296   This only works correctly for square matrices where the subblock A->A is the
1297    diagonal block
1298 */
1299 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1300 {
1301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1306   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1317   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1324   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1325   PetscErrorCode ierr;
1326   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1327   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1328   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1329 
1330   PetscFunctionBegin;
1331   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1332   mat->getrowactive = PETSC_TRUE;
1333 
1334   if (!mat->rowvalues && (idx || v)) {
1335     /*
1336         allocate enough space to hold information from the longest row.
1337     */
1338     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1339     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1340     PetscInt     max = 1,mbs = mat->mbs,tmp;
1341     for (i=0; i<mbs; i++) {
1342       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1343       if (max < tmp) max = tmp;
1344     }
1345     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1346   }
1347 
1348   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1349   lrow = row - brstart;  /* local row index */
1350 
1351   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1352   if (!v)   {pvA = 0; pvB = 0;}
1353   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1354   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1355   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1356   nztot = nzA + nzB;
1357 
1358   cmap = mat->garray;
1359   if (v  || idx) {
1360     if (nztot) {
1361       /* Sort by increasing column numbers, assuming A and B already sorted */
1362       PetscInt imark = -1;
1363       if (v) {
1364         *v = v_p = mat->rowvalues;
1365         for (i=0; i<nzB; i++) {
1366           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1367           else break;
1368         }
1369         imark = i;
1370         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1371         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1372       }
1373       if (idx) {
1374         *idx = idx_p = mat->rowindices;
1375         if (imark > -1) {
1376           for (i=0; i<imark; i++) {
1377             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1378           }
1379         } else {
1380           for (i=0; i<nzB; i++) {
1381             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1382             else break;
1383           }
1384           imark = i;
1385         }
1386         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1387         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1388       }
1389     } else {
1390       if (idx) *idx = 0;
1391       if (v)   *v   = 0;
1392     }
1393   }
1394   *nz  = nztot;
1395   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1396   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1401 {
1402   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1403 
1404   PetscFunctionBegin;
1405   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1406   baij->getrowactive = PETSC_FALSE;
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1411 {
1412   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1413   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1414 
1415   PetscFunctionBegin;
1416   aA->getrow_utriangular = PETSC_TRUE;
1417   PetscFunctionReturn(0);
1418 }
1419 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1420 {
1421   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1422   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1423 
1424   PetscFunctionBegin;
1425   aA->getrow_utriangular = PETSC_FALSE;
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1430 {
1431   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1432   PetscErrorCode ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1436   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1441 {
1442   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1443   PetscErrorCode ierr;
1444 
1445   PetscFunctionBegin;
1446   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1447   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1452    Input: isrow       - distributed(parallel),
1453           iscol_local - locally owned (seq)
1454 */
1455 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1456 {
1457   PetscErrorCode ierr;
1458   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1459   const PetscInt *ptr1,*ptr2;
1460 
1461   PetscFunctionBegin;
1462   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1463   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1464   if (sz1 > sz2) {
1465     *flg = PETSC_FALSE;
1466     PetscFunctionReturn(0);
1467   }
1468 
1469   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1470   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1471 
1472   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1473   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1474   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1475   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1476   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1477   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1478 
1479   nmatch=0;
1480   k     = 0;
1481   for (i=0; i<sz1; i++){
1482     for (j=k; j<sz2; j++){
1483       if (a1[i] == a2[j]) {
1484         k = j; nmatch++;
1485         break;
1486       }
1487     }
1488   }
1489   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1490   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1491   ierr = PetscFree(a1);CHKERRQ(ierr);
1492   ierr = PetscFree(a2);CHKERRQ(ierr);
1493   if (nmatch < sz1) {
1494     *flg = PETSC_FALSE;
1495   } else {
1496     *flg = PETSC_TRUE;
1497   }
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1502 {
1503   PetscErrorCode ierr;
1504   IS             iscol_local;
1505   PetscInt       csize;
1506   PetscBool      isequal;
1507 
1508   PetscFunctionBegin;
1509   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1510   if (call == MAT_REUSE_MATRIX) {
1511     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1512     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1513   } else {
1514     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1515     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1516     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1517   }
1518 
1519   /* now call MatCreateSubMatrix_MPIBAIJ() */
1520   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1521   if (call == MAT_INITIAL_MATRIX) {
1522     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1523     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1524   }
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1529 {
1530   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1535   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1540 {
1541   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1542   Mat            A  = a->A,B = a->B;
1543   PetscErrorCode ierr;
1544   PetscReal      isend[5],irecv[5];
1545 
1546   PetscFunctionBegin;
1547   info->block_size = (PetscReal)matin->rmap->bs;
1548 
1549   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1550 
1551   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1552   isend[3] = info->memory;  isend[4] = info->mallocs;
1553 
1554   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1555 
1556   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1557   isend[3] += info->memory;  isend[4] += info->mallocs;
1558   if (flag == MAT_LOCAL) {
1559     info->nz_used      = isend[0];
1560     info->nz_allocated = isend[1];
1561     info->nz_unneeded  = isend[2];
1562     info->memory       = isend[3];
1563     info->mallocs      = isend[4];
1564   } else if (flag == MAT_GLOBAL_MAX) {
1565     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1566 
1567     info->nz_used      = irecv[0];
1568     info->nz_allocated = irecv[1];
1569     info->nz_unneeded  = irecv[2];
1570     info->memory       = irecv[3];
1571     info->mallocs      = irecv[4];
1572   } else if (flag == MAT_GLOBAL_SUM) {
1573     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1574 
1575     info->nz_used      = irecv[0];
1576     info->nz_allocated = irecv[1];
1577     info->nz_unneeded  = irecv[2];
1578     info->memory       = irecv[3];
1579     info->mallocs      = irecv[4];
1580   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1581   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1582   info->fill_ratio_needed = 0;
1583   info->factor_mallocs    = 0;
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1588 {
1589   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1590   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1591   PetscErrorCode ierr;
1592 
1593   PetscFunctionBegin;
1594   switch (op) {
1595   case MAT_NEW_NONZERO_LOCATIONS:
1596   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1597   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1598   case MAT_KEEP_NONZERO_PATTERN:
1599   case MAT_SUBMAT_SINGLEIS:
1600   case MAT_NEW_NONZERO_LOCATION_ERR:
1601     MatCheckPreallocated(A,1);
1602     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1603     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1604     break;
1605   case MAT_ROW_ORIENTED:
1606     MatCheckPreallocated(A,1);
1607     a->roworiented = flg;
1608 
1609     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1610     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1611     break;
1612   case MAT_NEW_DIAGONALS:
1613     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1614     break;
1615   case MAT_IGNORE_OFF_PROC_ENTRIES:
1616     a->donotstash = flg;
1617     break;
1618   case MAT_USE_HASH_TABLE:
1619     a->ht_flag = flg;
1620     break;
1621   case MAT_HERMITIAN:
1622     MatCheckPreallocated(A,1);
1623     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1624     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1625 #if defined(PETSC_USE_COMPLEX)
1626     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1627 #endif
1628     break;
1629   case MAT_SPD:
1630     A->spd_set = PETSC_TRUE;
1631     A->spd     = flg;
1632     if (flg) {
1633       A->symmetric                  = PETSC_TRUE;
1634       A->structurally_symmetric     = PETSC_TRUE;
1635       A->symmetric_set              = PETSC_TRUE;
1636       A->structurally_symmetric_set = PETSC_TRUE;
1637     }
1638     break;
1639   case MAT_SYMMETRIC:
1640     MatCheckPreallocated(A,1);
1641     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1642     break;
1643   case MAT_STRUCTURALLY_SYMMETRIC:
1644     MatCheckPreallocated(A,1);
1645     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1646     break;
1647   case MAT_SYMMETRY_ETERNAL:
1648     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1649     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1650     break;
1651   case MAT_IGNORE_LOWER_TRIANGULAR:
1652     aA->ignore_ltriangular = flg;
1653     break;
1654   case MAT_ERROR_LOWER_TRIANGULAR:
1655     aA->ignore_ltriangular = flg;
1656     break;
1657   case MAT_GETROW_UPPERTRIANGULAR:
1658     aA->getrow_utriangular = flg;
1659     break;
1660   default:
1661     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1662   }
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1667 {
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671   if (reuse == MAT_INITIAL_MATRIX) {
1672     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1673   }  else if (reuse == MAT_REUSE_MATRIX) {
1674     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1680 {
1681   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1682   Mat            a     = baij->A, b=baij->B;
1683   PetscErrorCode ierr;
1684   PetscInt       nv,m,n;
1685   PetscBool      flg;
1686 
1687   PetscFunctionBegin;
1688   if (ll != rr) {
1689     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1690     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1691   }
1692   if (!ll) PetscFunctionReturn(0);
1693 
1694   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1695   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1696 
1697   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1698   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1699 
1700   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1701 
1702   /* left diagonalscale the off-diagonal part */
1703   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1704 
1705   /* scale the diagonal part */
1706   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1707 
1708   /* right diagonalscale the off-diagonal part */
1709   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1715 {
1716   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1717   PetscErrorCode ierr;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1725 
1726 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1727 {
1728   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1729   Mat            a,b,c,d;
1730   PetscBool      flg;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   a = matA->A; b = matA->B;
1735   c = matB->A; d = matB->B;
1736 
1737   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1738   if (flg) {
1739     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1740   }
1741   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1746 {
1747   PetscErrorCode ierr;
1748   PetscBool      isbaij;
1749 
1750   PetscFunctionBegin;
1751   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1752   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1753   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1754   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1755     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1756     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1757     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1758   } else {
1759     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1760     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1761 
1762     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1763     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1764   }
1765   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1770 {
1771   PetscErrorCode ierr;
1772 
1773   PetscFunctionBegin;
1774   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1779 {
1780   PetscErrorCode ierr;
1781   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1782   PetscBLASInt   bnz,one=1;
1783   Mat_SeqSBAIJ   *xa,*ya;
1784   Mat_SeqBAIJ    *xb,*yb;
1785 
1786   PetscFunctionBegin;
1787   if (str == SAME_NONZERO_PATTERN) {
1788     PetscScalar alpha = a;
1789     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1790     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1791     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1792     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1793     xb   = (Mat_SeqBAIJ*)xx->B->data;
1794     yb   = (Mat_SeqBAIJ*)yy->B->data;
1795     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1796     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1797     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1798   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1799     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1800     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1801     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1802   } else {
1803     Mat      B;
1804     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1805     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1806     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1807     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1808     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1809     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1810     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1811     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1812     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1813     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1814     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1815     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1816     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1817     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1818     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1819     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1820     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1821     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1822     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1823     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1824   }
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1829 {
1830   PetscErrorCode ierr;
1831   PetscInt       i;
1832   PetscBool      flg;
1833 
1834   PetscFunctionBegin;
1835   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1836   for (i=0; i<n; i++) {
1837     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1838     if (!flg) {
1839       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1840     }
1841   }
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1846 {
1847   PetscErrorCode ierr;
1848   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1849   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1850 
1851   PetscFunctionBegin;
1852   if (!Y->preallocated) {
1853     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1854   } else if (!aij->nz) {
1855     PetscInt nonew = aij->nonew;
1856     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1857     aij->nonew = nonew;
1858   }
1859   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1864 {
1865   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1866   PetscErrorCode ierr;
1867 
1868   PetscFunctionBegin;
1869   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1870   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1871   if (d) {
1872     PetscInt rstart;
1873     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1874     *d += rstart/A->rmap->bs;
1875 
1876   }
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1881 {
1882   PetscFunctionBegin;
1883   *a = ((Mat_MPISBAIJ*)A->data)->A;
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 /* -------------------------------------------------------------------*/
1888 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1889                                        MatGetRow_MPISBAIJ,
1890                                        MatRestoreRow_MPISBAIJ,
1891                                        MatMult_MPISBAIJ,
1892                                /*  4*/ MatMultAdd_MPISBAIJ,
1893                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1894                                        MatMultAdd_MPISBAIJ,
1895                                        0,
1896                                        0,
1897                                        0,
1898                                /* 10*/ 0,
1899                                        0,
1900                                        0,
1901                                        MatSOR_MPISBAIJ,
1902                                        MatTranspose_MPISBAIJ,
1903                                /* 15*/ MatGetInfo_MPISBAIJ,
1904                                        MatEqual_MPISBAIJ,
1905                                        MatGetDiagonal_MPISBAIJ,
1906                                        MatDiagonalScale_MPISBAIJ,
1907                                        MatNorm_MPISBAIJ,
1908                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1909                                        MatAssemblyEnd_MPISBAIJ,
1910                                        MatSetOption_MPISBAIJ,
1911                                        MatZeroEntries_MPISBAIJ,
1912                                /* 24*/ 0,
1913                                        0,
1914                                        0,
1915                                        0,
1916                                        0,
1917                                /* 29*/ MatSetUp_MPISBAIJ,
1918                                        0,
1919                                        0,
1920                                        MatGetDiagonalBlock_MPISBAIJ,
1921                                        0,
1922                                /* 34*/ MatDuplicate_MPISBAIJ,
1923                                        0,
1924                                        0,
1925                                        0,
1926                                        0,
1927                                /* 39*/ MatAXPY_MPISBAIJ,
1928                                        MatCreateSubMatrices_MPISBAIJ,
1929                                        MatIncreaseOverlap_MPISBAIJ,
1930                                        MatGetValues_MPISBAIJ,
1931                                        MatCopy_MPISBAIJ,
1932                                /* 44*/ 0,
1933                                        MatScale_MPISBAIJ,
1934                                        MatShift_MPISBAIJ,
1935                                        0,
1936                                        0,
1937                                /* 49*/ 0,
1938                                        0,
1939                                        0,
1940                                        0,
1941                                        0,
1942                                /* 54*/ 0,
1943                                        0,
1944                                        MatSetUnfactored_MPISBAIJ,
1945                                        0,
1946                                        MatSetValuesBlocked_MPISBAIJ,
1947                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1948                                        0,
1949                                        0,
1950                                        0,
1951                                        0,
1952                                /* 64*/ 0,
1953                                        0,
1954                                        0,
1955                                        0,
1956                                        0,
1957                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1958                                        0,
1959                                        0,
1960                                        0,
1961                                        0,
1962                                /* 74*/ 0,
1963                                        0,
1964                                        0,
1965                                        0,
1966                                        0,
1967                                /* 79*/ 0,
1968                                        0,
1969                                        0,
1970                                        0,
1971                                        MatLoad_MPISBAIJ,
1972                                /* 84*/ 0,
1973                                        0,
1974                                        0,
1975                                        0,
1976                                        0,
1977                                /* 89*/ 0,
1978                                        0,
1979                                        0,
1980                                        0,
1981                                        0,
1982                                /* 94*/ 0,
1983                                        0,
1984                                        0,
1985                                        0,
1986                                        0,
1987                                /* 99*/ 0,
1988                                        0,
1989                                        0,
1990                                        0,
1991                                        0,
1992                                /*104*/ 0,
1993                                        MatRealPart_MPISBAIJ,
1994                                        MatImaginaryPart_MPISBAIJ,
1995                                        MatGetRowUpperTriangular_MPISBAIJ,
1996                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1997                                /*109*/ 0,
1998                                        0,
1999                                        0,
2000                                        0,
2001                                        MatMissingDiagonal_MPISBAIJ,
2002                                /*114*/ 0,
2003                                        0,
2004                                        0,
2005                                        0,
2006                                        0,
2007                                /*119*/ 0,
2008                                        0,
2009                                        0,
2010                                        0,
2011                                        0,
2012                                /*124*/ 0,
2013                                        0,
2014                                        0,
2015                                        0,
2016                                        0,
2017                                /*129*/ 0,
2018                                        0,
2019                                        0,
2020                                        0,
2021                                        0,
2022                                /*134*/ 0,
2023                                        0,
2024                                        0,
2025                                        0,
2026                                        0,
2027                                /*139*/ MatSetBlockSizes_Default,
2028                                        0,
2029                                        0,
2030                                        0,
2031                                        0,
2032                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2033 };
2034 
2035 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2036 {
2037   Mat_MPISBAIJ   *b;
2038   PetscErrorCode ierr;
2039   PetscInt       i,mbs,Mbs;
2040 
2041   PetscFunctionBegin;
2042   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2043   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2044   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2045   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2046 
2047   b   = (Mat_MPISBAIJ*)B->data;
2048   mbs = B->rmap->n/bs;
2049   Mbs = B->rmap->N/bs;
2050   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2051 
2052   B->rmap->bs = bs;
2053   b->bs2      = bs*bs;
2054   b->mbs      = mbs;
2055   b->Mbs      = Mbs;
2056   b->nbs      = B->cmap->n/bs;
2057   b->Nbs      = B->cmap->N/bs;
2058 
2059   for (i=0; i<=b->size; i++) {
2060     b->rangebs[i] = B->rmap->range[i]/bs;
2061   }
2062   b->rstartbs = B->rmap->rstart/bs;
2063   b->rendbs   = B->rmap->rend/bs;
2064 
2065   b->cstartbs = B->cmap->rstart/bs;
2066   b->cendbs   = B->cmap->rend/bs;
2067 
2068 #if defined(PETSC_USE_CTABLE)
2069   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2070 #else
2071   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2072 #endif
2073   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2074   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2075   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2076   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2077   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2078   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2079   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2080   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2081   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2082 
2083   /* Because the B will have been resized we simply destroy it and create a new one each time */
2084   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2085   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2086   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2087   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2088   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2089 
2090   if (!B->preallocated) {
2091     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2092     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2093     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2094     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2095     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2096   }
2097 
2098   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2099   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2100 
2101   B->preallocated  = PETSC_TRUE;
2102   B->was_assembled = PETSC_FALSE;
2103   B->assembled     = PETSC_FALSE;
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2108 {
2109   PetscInt       m,rstart,cstart,cend;
2110   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2111   const PetscInt *JJ    =0;
2112   PetscScalar    *values=0;
2113   PetscErrorCode ierr;
2114 
2115   PetscFunctionBegin;
2116   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2117   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2118   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2119   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2120   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2121   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2122   m      = B->rmap->n/bs;
2123   rstart = B->rmap->rstart/bs;
2124   cstart = B->cmap->rstart/bs;
2125   cend   = B->cmap->rend/bs;
2126 
2127   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2128   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2129   for (i=0; i<m; i++) {
2130     nz = ii[i+1] - ii[i];
2131     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2132     nz_max = PetscMax(nz_max,nz);
2133     JJ     = jj + ii[i];
2134     for (j=0; j<nz; j++) {
2135       if (*JJ >= cstart) break;
2136       JJ++;
2137     }
2138     d = 0;
2139     for (; j<nz; j++) {
2140       if (*JJ++ >= cend) break;
2141       d++;
2142     }
2143     d_nnz[i] = d;
2144     o_nnz[i] = nz - d;
2145   }
2146   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2147   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2148 
2149   values = (PetscScalar*)V;
2150   if (!values) {
2151     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2152     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2153   }
2154   for (i=0; i<m; i++) {
2155     PetscInt          row    = i + rstart;
2156     PetscInt          ncols  = ii[i+1] - ii[i];
2157     const PetscInt    *icols = jj + ii[i];
2158     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2159     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2160   }
2161 
2162   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2163   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2164   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2165   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 /*MC
2170    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2171    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2172    the matrix is stored.
2173 
2174   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2175   can call MatSetOption(Mat, MAT_HERMITIAN);
2176 
2177    Options Database Keys:
2178 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2179 
2180   Level: beginner
2181 
2182 .seealso: MatCreateMPISBAIJ
2183 M*/
2184 
2185 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2186 {
2187   Mat_MPISBAIJ   *b;
2188   PetscErrorCode ierr;
2189   PetscBool      flg = PETSC_FALSE;
2190 
2191   PetscFunctionBegin;
2192   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2193   B->data = (void*)b;
2194   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2195 
2196   B->ops->destroy = MatDestroy_MPISBAIJ;
2197   B->ops->view    = MatView_MPISBAIJ;
2198   B->assembled    = PETSC_FALSE;
2199   B->insertmode   = NOT_SET_VALUES;
2200 
2201   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2202   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2203 
2204   /* build local table of row and column ownerships */
2205   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2206 
2207   /* build cache for off array entries formed */
2208   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2209 
2210   b->donotstash  = PETSC_FALSE;
2211   b->colmap      = NULL;
2212   b->garray      = NULL;
2213   b->roworiented = PETSC_TRUE;
2214 
2215   /* stuff used in block assembly */
2216   b->barray = 0;
2217 
2218   /* stuff used for matrix vector multiply */
2219   b->lvec    = 0;
2220   b->Mvctx   = 0;
2221   b->slvec0  = 0;
2222   b->slvec0b = 0;
2223   b->slvec1  = 0;
2224   b->slvec1a = 0;
2225   b->slvec1b = 0;
2226   b->sMvctx  = 0;
2227 
2228   /* stuff for MatGetRow() */
2229   b->rowindices   = 0;
2230   b->rowvalues    = 0;
2231   b->getrowactive = PETSC_FALSE;
2232 
2233   /* hash table stuff */
2234   b->ht           = 0;
2235   b->hd           = 0;
2236   b->ht_size      = 0;
2237   b->ht_flag      = PETSC_FALSE;
2238   b->ht_fact      = 0;
2239   b->ht_total_ct  = 0;
2240   b->ht_insert_ct = 0;
2241 
2242   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2243   b->ijonly = PETSC_FALSE;
2244 
2245   b->in_loc = 0;
2246   b->v_loc  = 0;
2247   b->n_loc  = 0;
2248 
2249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2252   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2253 #if defined(PETSC_HAVE_ELEMENTAL)
2254   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2255 #endif
2256 
2257   B->symmetric                  = PETSC_TRUE;
2258   B->structurally_symmetric     = PETSC_TRUE;
2259   B->symmetric_set              = PETSC_TRUE;
2260   B->structurally_symmetric_set = PETSC_TRUE;
2261   B->symmetric_eternal          = PETSC_TRUE;
2262 
2263   B->hermitian                  = PETSC_FALSE;
2264   B->hermitian_set              = PETSC_FALSE;
2265 
2266   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2267   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2268   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2269   if (flg) {
2270     PetscReal fact = 1.39;
2271     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2272     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2273     if (fact <= 1.0) fact = 1.39;
2274     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2275     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2276   }
2277   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 /*MC
2282    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2283 
2284    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2285    and MATMPISBAIJ otherwise.
2286 
2287    Options Database Keys:
2288 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2289 
2290   Level: beginner
2291 
2292 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2293 M*/
2294 
2295 /*@C
2296    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2297    the user should preallocate the matrix storage by setting the parameters
2298    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2299    performance can be increased by more than a factor of 50.
2300 
2301    Collective on Mat
2302 
2303    Input Parameters:
2304 +  B - the matrix
2305 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2306           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2307 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2308            submatrix  (same for all local rows)
2309 .  d_nnz - array containing the number of block nonzeros in the various block rows
2310            in the upper triangular and diagonal part of the in diagonal portion of the local
2311            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2312            for the diagonal entry and set a value even if it is zero.
2313 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2314            submatrix (same for all local rows).
2315 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2316            off-diagonal portion of the local submatrix that is right of the diagonal
2317            (possibly different for each block row) or NULL.
2318 
2319 
2320    Options Database Keys:
2321 .   -mat_no_unroll - uses code that does not unroll the loops in the
2322                      block calculations (much slower)
2323 .   -mat_block_size - size of the blocks to use
2324 
2325    Notes:
2326 
2327    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2328    than it must be used on all processors that share the object for that argument.
2329 
2330    If the *_nnz parameter is given then the *_nz parameter is ignored
2331 
2332    Storage Information:
2333    For a square global matrix we define each processor's diagonal portion
2334    to be its local rows and the corresponding columns (a square submatrix);
2335    each processor's off-diagonal portion encompasses the remainder of the
2336    local matrix (a rectangular submatrix).
2337 
2338    The user can specify preallocated storage for the diagonal part of
2339    the local submatrix with either d_nz or d_nnz (not both).  Set
2340    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2341    memory allocation.  Likewise, specify preallocated storage for the
2342    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2343 
2344    You can call MatGetInfo() to get information on how effective the preallocation was;
2345    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2346    You can also run with the option -info and look for messages with the string
2347    malloc in them to see if additional memory allocation was needed.
2348 
2349    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2350    the figure below we depict these three local rows and all columns (0-11).
2351 
2352 .vb
2353            0 1 2 3 4 5 6 7 8 9 10 11
2354           --------------------------
2355    row 3  |. . . d d d o o o o  o  o
2356    row 4  |. . . d d d o o o o  o  o
2357    row 5  |. . . d d d o o o o  o  o
2358           --------------------------
2359 .ve
2360 
2361    Thus, any entries in the d locations are stored in the d (diagonal)
2362    submatrix, and any entries in the o locations are stored in the
2363    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2364    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2365 
2366    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2367    plus the diagonal part of the d matrix,
2368    and o_nz should indicate the number of block nonzeros per row in the o matrix
2369 
2370    In general, for PDE problems in which most nonzeros are near the diagonal,
2371    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2372    or you will get TERRIBLE performance; see the users' manual chapter on
2373    matrices.
2374 
2375    Level: intermediate
2376 
2377 .keywords: matrix, block, aij, compressed row, sparse, parallel
2378 
2379 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2380 @*/
2381 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2382 {
2383   PetscErrorCode ierr;
2384 
2385   PetscFunctionBegin;
2386   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2387   PetscValidType(B,1);
2388   PetscValidLogicalCollectiveInt(B,bs,2);
2389   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 /*@C
2394    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2395    (block compressed row).  For good matrix assembly performance
2396    the user should preallocate the matrix storage by setting the parameters
2397    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2398    performance can be increased by more than a factor of 50.
2399 
2400    Collective on MPI_Comm
2401 
2402    Input Parameters:
2403 +  comm - MPI communicator
2404 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2405           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2406 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2407            This value should be the same as the local size used in creating the
2408            y vector for the matrix-vector product y = Ax.
2409 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2410            This value should be the same as the local size used in creating the
2411            x vector for the matrix-vector product y = Ax.
2412 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2413 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2414 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2415            submatrix  (same for all local rows)
2416 .  d_nnz - array containing the number of block nonzeros in the various block rows
2417            in the upper triangular portion of the in diagonal portion of the local
2418            (possibly different for each block block row) or NULL.
2419            If you plan to factor the matrix you must leave room for the diagonal entry and
2420            set its value even if it is zero.
2421 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2422            submatrix (same for all local rows).
2423 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2424            off-diagonal portion of the local submatrix (possibly different for
2425            each block row) or NULL.
2426 
2427    Output Parameter:
2428 .  A - the matrix
2429 
2430    Options Database Keys:
2431 .   -mat_no_unroll - uses code that does not unroll the loops in the
2432                      block calculations (much slower)
2433 .   -mat_block_size - size of the blocks to use
2434 .   -mat_mpi - use the parallel matrix data structures even on one processor
2435                (defaults to using SeqBAIJ format on one processor)
2436 
2437    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2438    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2439    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2440 
2441    Notes:
2442    The number of rows and columns must be divisible by blocksize.
2443    This matrix type does not support complex Hermitian operation.
2444 
2445    The user MUST specify either the local or global matrix dimensions
2446    (possibly both).
2447 
2448    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2449    than it must be used on all processors that share the object for that argument.
2450 
2451    If the *_nnz parameter is given then the *_nz parameter is ignored
2452 
2453    Storage Information:
2454    For a square global matrix we define each processor's diagonal portion
2455    to be its local rows and the corresponding columns (a square submatrix);
2456    each processor's off-diagonal portion encompasses the remainder of the
2457    local matrix (a rectangular submatrix).
2458 
2459    The user can specify preallocated storage for the diagonal part of
2460    the local submatrix with either d_nz or d_nnz (not both).  Set
2461    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2462    memory allocation.  Likewise, specify preallocated storage for the
2463    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2464 
2465    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2466    the figure below we depict these three local rows and all columns (0-11).
2467 
2468 .vb
2469            0 1 2 3 4 5 6 7 8 9 10 11
2470           --------------------------
2471    row 3  |. . . d d d o o o o  o  o
2472    row 4  |. . . d d d o o o o  o  o
2473    row 5  |. . . d d d o o o o  o  o
2474           --------------------------
2475 .ve
2476 
2477    Thus, any entries in the d locations are stored in the d (diagonal)
2478    submatrix, and any entries in the o locations are stored in the
2479    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2480    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2481 
2482    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2483    plus the diagonal part of the d matrix,
2484    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2485    In general, for PDE problems in which most nonzeros are near the diagonal,
2486    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2487    or you will get TERRIBLE performance; see the users' manual chapter on
2488    matrices.
2489 
2490    Level: intermediate
2491 
2492 .keywords: matrix, block, aij, compressed row, sparse, parallel
2493 
2494 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2495 @*/
2496 
2497 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2498 {
2499   PetscErrorCode ierr;
2500   PetscMPIInt    size;
2501 
2502   PetscFunctionBegin;
2503   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2504   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2505   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2506   if (size > 1) {
2507     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2508     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2509   } else {
2510     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2511     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2512   }
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 
2517 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2518 {
2519   Mat            mat;
2520   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2521   PetscErrorCode ierr;
2522   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2523   PetscScalar    *array;
2524 
2525   PetscFunctionBegin;
2526   *newmat = 0;
2527 
2528   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2529   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2530   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2531   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2532   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2533 
2534   mat->factortype   = matin->factortype;
2535   mat->preallocated = PETSC_TRUE;
2536   mat->assembled    = PETSC_TRUE;
2537   mat->insertmode   = NOT_SET_VALUES;
2538 
2539   a      = (Mat_MPISBAIJ*)mat->data;
2540   a->bs2 = oldmat->bs2;
2541   a->mbs = oldmat->mbs;
2542   a->nbs = oldmat->nbs;
2543   a->Mbs = oldmat->Mbs;
2544   a->Nbs = oldmat->Nbs;
2545 
2546   a->size         = oldmat->size;
2547   a->rank         = oldmat->rank;
2548   a->donotstash   = oldmat->donotstash;
2549   a->roworiented  = oldmat->roworiented;
2550   a->rowindices   = 0;
2551   a->rowvalues    = 0;
2552   a->getrowactive = PETSC_FALSE;
2553   a->barray       = 0;
2554   a->rstartbs     = oldmat->rstartbs;
2555   a->rendbs       = oldmat->rendbs;
2556   a->cstartbs     = oldmat->cstartbs;
2557   a->cendbs       = oldmat->cendbs;
2558 
2559   /* hash table stuff */
2560   a->ht           = 0;
2561   a->hd           = 0;
2562   a->ht_size      = 0;
2563   a->ht_flag      = oldmat->ht_flag;
2564   a->ht_fact      = oldmat->ht_fact;
2565   a->ht_total_ct  = 0;
2566   a->ht_insert_ct = 0;
2567 
2568   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2569   if (oldmat->colmap) {
2570 #if defined(PETSC_USE_CTABLE)
2571     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2572 #else
2573     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2574     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2575     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2576 #endif
2577   } else a->colmap = 0;
2578 
2579   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2580     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2581     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2582     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2583   } else a->garray = 0;
2584 
2585   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2586   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2587   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2588   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2589   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2590 
2591   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2592   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2593   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2594   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2595 
2596   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2597   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2598   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2599   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2600   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2601   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2602   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2603   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2604   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2605   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2606   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2607   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2608   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2609 
2610   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2611   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2612   a->sMvctx = oldmat->sMvctx;
2613   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2614 
2615   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2616   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2617   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2618   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2619   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2620   *newmat = mat;
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2625 {
2626   PetscErrorCode ierr;
2627   PetscInt       i,nz,j,rstart,rend;
2628   PetscScalar    *vals,*buf;
2629   MPI_Comm       comm;
2630   MPI_Status     status;
2631   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2632   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2633   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2634   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2635   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2636   PetscInt       dcount,kmax,k,nzcount,tmp;
2637   int            fd;
2638   PetscBool      isbinary;
2639 
2640   PetscFunctionBegin;
2641   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2642   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2643 
2644   /* force binary viewer to load .info file if it has not yet done so */
2645   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2646   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2647   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2648   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2649   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2650   if (bs < 0) bs = 1;
2651 
2652   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2653   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2654   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2655   if (!rank) {
2656     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2657     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2658     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2659   }
2660 
2661   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2662   M    = header[1];
2663   N    = header[2];
2664 
2665   /* If global sizes are set, check if they are consistent with that given in the file */
2666   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2667   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2668 
2669   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2670 
2671   /*
2672      This code adds extra rows to make sure the number of rows is
2673      divisible by the blocksize
2674   */
2675   Mbs        = M/bs;
2676   extra_rows = bs - M + bs*(Mbs);
2677   if (extra_rows == bs) extra_rows = 0;
2678   else                  Mbs++;
2679   if (extra_rows &&!rank) {
2680     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2681   }
2682 
2683   /* determine ownership of all rows */
2684   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2685     mbs = Mbs/size + ((Mbs % size) > rank);
2686     m   = mbs*bs;
2687   } else { /* User Set */
2688     m   = newmat->rmap->n;
2689     mbs = m/bs;
2690   }
2691   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2692   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2693   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2694   rowners[0] = 0;
2695   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2696   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2697   rstart = rowners[rank];
2698   rend   = rowners[rank+1];
2699 
2700   /* distribute row lengths to all processors */
2701   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2702   if (!rank) {
2703     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2704     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2705     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2706     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2707     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2708     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2709     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2710   } else {
2711     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2712   }
2713 
2714   if (!rank) {   /* procs[0] */
2715     /* calculate the number of nonzeros on each processor */
2716     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2717     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2718     for (i=0; i<size; i++) {
2719       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2720         procsnz[i] += rowlengths[j];
2721       }
2722     }
2723     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2724 
2725     /* determine max buffer needed and allocate it */
2726     maxnz = 0;
2727     for (i=0; i<size; i++) {
2728       maxnz = PetscMax(maxnz,procsnz[i]);
2729     }
2730     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2731 
2732     /* read in my part of the matrix column indices  */
2733     nz     = procsnz[0];
2734     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2735     mycols = ibuf;
2736     if (size == 1) nz -= extra_rows;
2737     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2738     if (size == 1) {
2739       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2740     }
2741 
2742     /* read in every ones (except the last) and ship off */
2743     for (i=1; i<size-1; i++) {
2744       nz   = procsnz[i];
2745       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2746       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2747     }
2748     /* read in the stuff for the last proc */
2749     if (size != 1) {
2750       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2751       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2752       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2753       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2754     }
2755     ierr = PetscFree(cols);CHKERRQ(ierr);
2756   } else {  /* procs[i], i>0 */
2757     /* determine buffer space needed for message */
2758     nz = 0;
2759     for (i=0; i<m; i++) nz += locrowlens[i];
2760     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2761     mycols = ibuf;
2762     /* receive message of column indices*/
2763     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2764     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2765     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2766   }
2767 
2768   /* loop over local rows, determining number of off diagonal entries */
2769   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2770   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2771   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2772   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2773   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2774   rowcount = 0;
2775   nzcount  = 0;
2776   for (i=0; i<mbs; i++) {
2777     dcount  = 0;
2778     odcount = 0;
2779     for (j=0; j<bs; j++) {
2780       kmax = locrowlens[rowcount];
2781       for (k=0; k<kmax; k++) {
2782         tmp = mycols[nzcount++]/bs; /* block col. index */
2783         if (!mask[tmp]) {
2784           mask[tmp] = 1;
2785           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2786           else masked1[dcount++] = tmp; /* entry in diag portion */
2787         }
2788       }
2789       rowcount++;
2790     }
2791 
2792     dlens[i]  = dcount;  /* d_nzz[i] */
2793     odlens[i] = odcount; /* o_nzz[i] */
2794 
2795     /* zero out the mask elements we set */
2796     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2797     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2798   }
2799   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2800   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2801   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2802 
2803   if (!rank) {
2804     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2805     /* read in my part of the matrix numerical values  */
2806     nz     = procsnz[0];
2807     vals   = buf;
2808     mycols = ibuf;
2809     if (size == 1) nz -= extra_rows;
2810     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2811     if (size == 1) {
2812       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2813     }
2814 
2815     /* insert into matrix */
2816     jj = rstart*bs;
2817     for (i=0; i<m; i++) {
2818       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2819       mycols += locrowlens[i];
2820       vals   += locrowlens[i];
2821       jj++;
2822     }
2823 
2824     /* read in other processors (except the last one) and ship out */
2825     for (i=1; i<size-1; i++) {
2826       nz   = procsnz[i];
2827       vals = buf;
2828       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2829       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2830     }
2831     /* the last proc */
2832     if (size != 1) {
2833       nz   = procsnz[i] - extra_rows;
2834       vals = buf;
2835       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2836       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2837       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2838     }
2839     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2840 
2841   } else {
2842     /* receive numeric values */
2843     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2844 
2845     /* receive message of values*/
2846     vals   = buf;
2847     mycols = ibuf;
2848     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2849     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2850     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2851 
2852     /* insert into matrix */
2853     jj = rstart*bs;
2854     for (i=0; i<m; i++) {
2855       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2856       mycols += locrowlens[i];
2857       vals   += locrowlens[i];
2858       jj++;
2859     }
2860   }
2861 
2862   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2863   ierr = PetscFree(buf);CHKERRQ(ierr);
2864   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2865   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2866   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2867   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2868   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2869   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 /*XXXXX@
2874    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2875 
2876    Input Parameters:
2877 .  mat  - the matrix
2878 .  fact - factor
2879 
2880    Not Collective on Mat, each process can have a different hash factor
2881 
2882    Level: advanced
2883 
2884   Notes:
2885    This can also be set by the command line option: -mat_use_hash_table fact
2886 
2887 .keywords: matrix, hashtable, factor, HT
2888 
2889 .seealso: MatSetOption()
2890 @XXXXX*/
2891 
2892 
2893 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2894 {
2895   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2896   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2897   PetscReal      atmp;
2898   PetscReal      *work,*svalues,*rvalues;
2899   PetscErrorCode ierr;
2900   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2901   PetscMPIInt    rank,size;
2902   PetscInt       *rowners_bs,dest,count,source;
2903   PetscScalar    *va;
2904   MatScalar      *ba;
2905   MPI_Status     stat;
2906 
2907   PetscFunctionBegin;
2908   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2909   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2910   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2911 
2912   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2913   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2914 
2915   bs  = A->rmap->bs;
2916   mbs = a->mbs;
2917   Mbs = a->Mbs;
2918   ba  = b->a;
2919   bi  = b->i;
2920   bj  = b->j;
2921 
2922   /* find ownerships */
2923   rowners_bs = A->rmap->range;
2924 
2925   /* each proc creates an array to be distributed */
2926   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2927   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2928 
2929   /* row_max for B */
2930   if (rank != size-1) {
2931     for (i=0; i<mbs; i++) {
2932       ncols = bi[1] - bi[0]; bi++;
2933       brow  = bs*i;
2934       for (j=0; j<ncols; j++) {
2935         bcol = bs*(*bj);
2936         for (kcol=0; kcol<bs; kcol++) {
2937           col  = bcol + kcol;                /* local col index */
2938           col += rowners_bs[rank+1];      /* global col index */
2939           for (krow=0; krow<bs; krow++) {
2940             atmp = PetscAbsScalar(*ba); ba++;
2941             row  = brow + krow;   /* local row index */
2942             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2943             if (work[col] < atmp) work[col] = atmp;
2944           }
2945         }
2946         bj++;
2947       }
2948     }
2949 
2950     /* send values to its owners */
2951     for (dest=rank+1; dest<size; dest++) {
2952       svalues = work + rowners_bs[dest];
2953       count   = rowners_bs[dest+1]-rowners_bs[dest];
2954       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2955     }
2956   }
2957 
2958   /* receive values */
2959   if (rank) {
2960     rvalues = work;
2961     count   = rowners_bs[rank+1]-rowners_bs[rank];
2962     for (source=0; source<rank; source++) {
2963       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2964       /* process values */
2965       for (i=0; i<count; i++) {
2966         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2967       }
2968     }
2969   }
2970 
2971   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2972   ierr = PetscFree(work);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2977 {
2978   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2979   PetscErrorCode    ierr;
2980   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2981   PetscScalar       *x,*ptr,*from;
2982   Vec               bb1;
2983   const PetscScalar *b;
2984 
2985   PetscFunctionBegin;
2986   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2987   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2988 
2989   if (flag == SOR_APPLY_UPPER) {
2990     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2991     PetscFunctionReturn(0);
2992   }
2993 
2994   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2995     if (flag & SOR_ZERO_INITIAL_GUESS) {
2996       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2997       its--;
2998     }
2999 
3000     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3001     while (its--) {
3002 
3003       /* lower triangular part: slvec0b = - B^T*xx */
3004       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3005 
3006       /* copy xx into slvec0a */
3007       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3008       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3009       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3010       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3011 
3012       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3013 
3014       /* copy bb into slvec1a */
3015       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3016       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3017       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3018       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3019 
3020       /* set slvec1b = 0 */
3021       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3022 
3023       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3024       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3025       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3026       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3027 
3028       /* upper triangular part: bb1 = bb1 - B*x */
3029       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3030 
3031       /* local diagonal sweep */
3032       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3033     }
3034     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3035   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3036     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3037   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3038     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3039   } else if (flag & SOR_EISENSTAT) {
3040     Vec               xx1;
3041     PetscBool         hasop;
3042     const PetscScalar *diag;
3043     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3044     PetscInt          i,n;
3045 
3046     if (!mat->xx1) {
3047       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3048       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3049     }
3050     xx1 = mat->xx1;
3051     bb1 = mat->bb1;
3052 
3053     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3054 
3055     if (!mat->diag) {
3056       /* this is wrong for same matrix with new nonzero values */
3057       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3058       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3059     }
3060     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3061 
3062     if (hasop) {
3063       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3064       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3065     } else {
3066       /*
3067           These two lines are replaced by code that may be a bit faster for a good compiler
3068       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3069       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3070       */
3071       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3072       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3073       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3074       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3075       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3076       if (omega == 1.0) {
3077         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3078         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3079       } else {
3080         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3081         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3082       }
3083       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3084       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3085       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3086       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3087     }
3088 
3089     /* multiply off-diagonal portion of matrix */
3090     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3091     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3092     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3093     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3094     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3095     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3096     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3097     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3098     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3099     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3100 
3101     /* local sweep */
3102     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3103     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3104   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3109 {
3110   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3111   PetscErrorCode ierr;
3112   Vec            lvec1,bb1;
3113 
3114   PetscFunctionBegin;
3115   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3116   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3117 
3118   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3119     if (flag & SOR_ZERO_INITIAL_GUESS) {
3120       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3121       its--;
3122     }
3123 
3124     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3125     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3126     while (its--) {
3127       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3128 
3129       /* lower diagonal part: bb1 = bb - B^T*xx */
3130       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3131       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3132 
3133       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3134       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3135       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3136 
3137       /* upper diagonal part: bb1 = bb1 - B*x */
3138       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3139       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3140 
3141       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3142 
3143       /* diagonal sweep */
3144       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3145     }
3146     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3147     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3148   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3149   PetscFunctionReturn(0);
3150 }
3151 
3152 /*@
3153      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3154          CSR format the local rows.
3155 
3156    Collective on MPI_Comm
3157 
3158    Input Parameters:
3159 +  comm - MPI communicator
3160 .  bs - the block size, only a block size of 1 is supported
3161 .  m - number of local rows (Cannot be PETSC_DECIDE)
3162 .  n - This value should be the same as the local size used in creating the
3163        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3164        calculated if N is given) For square matrices n is almost always m.
3165 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3166 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3167 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3168 .   j - column indices
3169 -   a - matrix values
3170 
3171    Output Parameter:
3172 .   mat - the matrix
3173 
3174    Level: intermediate
3175 
3176    Notes:
3177        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3178      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3179      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3180 
3181        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3182 
3183 .keywords: matrix, aij, compressed row, sparse, parallel
3184 
3185 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3186           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3187 @*/
3188 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3189 {
3190   PetscErrorCode ierr;
3191 
3192 
3193   PetscFunctionBegin;
3194   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3195   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3196   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3197   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3198   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3199   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3200   PetscFunctionReturn(0);
3201 }
3202 
3203 
3204 /*@C
3205    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3206    (the default parallel PETSc format).
3207 
3208    Collective on MPI_Comm
3209 
3210    Input Parameters:
3211 +  B - the matrix
3212 .  bs - the block size
3213 .  i - the indices into j for the start of each local row (starts with zero)
3214 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3215 -  v - optional values in the matrix
3216 
3217    Level: developer
3218 
3219 .keywords: matrix, aij, compressed row, sparse, parallel
3220 
3221 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3222 @*/
3223 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3224 {
3225   PetscErrorCode ierr;
3226 
3227   PetscFunctionBegin;
3228   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3233 {
3234   PetscErrorCode ierr;
3235   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3236   PetscInt       *indx;
3237   PetscScalar    *values;
3238 
3239   PetscFunctionBegin;
3240   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3241   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3242     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3243     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3244     PetscInt       *bindx,rmax=a->rmax,j;
3245 
3246     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3247     mbs = m/bs; Nbs = N/cbs;
3248     if (n == PETSC_DECIDE) {
3249       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3250     }
3251     /* Check sum(n) = Nbs */
3252     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3253     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3254 
3255     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3256     rstart -= mbs;
3257 
3258     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3259     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3260     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3261     for (i=0; i<mbs; i++) {
3262       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3263       nnz = nnz/bs;
3264       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3265       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3266       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3267     }
3268     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3269     ierr = PetscFree(bindx);CHKERRQ(ierr);
3270 
3271     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3272     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3273     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3274     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3275     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3276     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3277   }
3278 
3279   /* numeric phase */
3280   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3281   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3282 
3283   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3284   for (i=0; i<m; i++) {
3285     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3286     Ii   = i + rstart;
3287     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3288     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3289   }
3290   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3291   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3292   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3293   PetscFunctionReturn(0);
3294 }
3295