xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a323099b8f0a49dddc3d94ba96ffefcb83195128)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
11 {
12   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
17   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
22 {
23   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
28   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
33   { \
34  \
35     brow = row/bs;  \
36     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
37     rmax = aimax[brow]; nrow = ailen[brow]; \
38     bcol = col/bs; \
39     ridx = row % bs; cidx = col % bs; \
40     low  = 0; high = nrow; \
41     while (high-low > 3) { \
42       t = (low+high)/2; \
43       if (rp[t] > bcol) high = t; \
44       else              low  = t; \
45     } \
46     for (_i=low; _i<high; _i++) { \
47       if (rp[_i] > bcol) break; \
48       if (rp[_i] == bcol) { \
49         bap = ap + bs2*_i + bs*cidx + ridx; \
50         if (addv == ADD_VALUES) *bap += value;  \
51         else                    *bap  = value;  \
52         goto a_noinsert; \
53       } \
54     } \
55     if (a->nonew == 1) goto a_noinsert; \
56     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
57     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
58     N = nrow++ - 1;  \
59     /* shift up all the later entries in this row */ \
60     for (ii=N; ii>=_i; ii--) { \
61       rp[ii+1] = rp[ii]; \
62       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
63     } \
64     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
65     rp[_i]                      = bcol;  \
66     ap[bs2*_i + bs*cidx + ridx] = value;  \
67     A->nonzerostate++;\
68 a_noinsert:; \
69     ailen[brow] = nrow; \
70   }
71 
72 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
73   { \
74     brow = row/bs;  \
75     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
76     rmax = bimax[brow]; nrow = bilen[brow]; \
77     bcol = col/bs; \
78     ridx = row % bs; cidx = col % bs; \
79     low  = 0; high = nrow; \
80     while (high-low > 3) { \
81       t = (low+high)/2; \
82       if (rp[t] > bcol) high = t; \
83       else              low  = t; \
84     } \
85     for (_i=low; _i<high; _i++) { \
86       if (rp[_i] > bcol) break; \
87       if (rp[_i] == bcol) { \
88         bap = ap + bs2*_i + bs*cidx + ridx; \
89         if (addv == ADD_VALUES) *bap += value;  \
90         else                    *bap  = value;  \
91         goto b_noinsert; \
92       } \
93     } \
94     if (b->nonew == 1) goto b_noinsert; \
95     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
96     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
97     N = nrow++ - 1;  \
98     /* shift up all the later entries in this row */ \
99     for (ii=N; ii>=_i; ii--) { \
100       rp[ii+1] = rp[ii]; \
101       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
102     } \
103     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
104     rp[_i]                      = bcol;  \
105     ap[bs2*_i + bs*cidx + ridx] = value;  \
106     B->nonzerostate++;\
107 b_noinsert:; \
108     bilen[brow] = nrow; \
109   }
110 
111 /* Only add/insert a(i,j) with i<=j (blocks).
112    Any a(i,j) with i>j input by user is ingored.
113 */
114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
117   MatScalar      value;
118   PetscBool      roworiented = baij->roworiented;
119   PetscErrorCode ierr;
120   PetscInt       i,j,row,col;
121   PetscInt       rstart_orig=mat->rmap->rstart;
122   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
123   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
124 
125   /* Some Variables required in the macro */
126   Mat          A     = baij->A;
127   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
128   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
129   MatScalar    *aa   =a->a;
130 
131   Mat         B     = baij->B;
132   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
133   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
134   MatScalar   *ba   =b->a;
135 
136   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
137   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
138   MatScalar *ap,*bap;
139 
140   /* for stash */
141   PetscInt  n_loc, *in_loc = NULL;
142   MatScalar *v_loc = NULL;
143 
144   PetscFunctionBegin;
145   if (!baij->donotstash) {
146     if (n > baij->n_loc) {
147       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
148       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
149       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
150       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
151 
152       baij->n_loc = n;
153     }
154     in_loc = baij->in_loc;
155     v_loc  = baij->v_loc;
156   }
157 
158   for (i=0; i<m; i++) {
159     if (im[i] < 0) continue;
160 #if defined(PETSC_USE_DEBUG)
161     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
162 #endif
163     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
164       row = im[i] - rstart_orig;              /* local row index */
165       for (j=0; j<n; j++) {
166         if (im[i]/bs > in[j]/bs) {
167           if (a->ignore_ltriangular) {
168             continue;    /* ignore lower triangular blocks */
169           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
170         }
171         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
172           col  = in[j] - cstart_orig;         /* local col index */
173           brow = row/bs; bcol = col/bs;
174           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
175           if (roworiented) value = v[i*n+j];
176           else             value = v[i+j*m];
177           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
178           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
179         } else if (in[j] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
182 #endif
183         else {  /* off-diag entry (B) */
184           if (mat->was_assembled) {
185             if (!baij->colmap) {
186               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187             }
188 #if defined(PETSC_USE_CTABLE)
189             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
190             col  = col - 1;
191 #else
192             col = baij->colmap[in[j]/bs] - 1;
193 #endif
194             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
195               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
196               col  =  in[j];
197               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
198               B    = baij->B;
199               b    = (Mat_SeqBAIJ*)(B)->data;
200               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
201               ba   = b->a;
202             } else col += in[j]%bs;
203           } else col = in[j];
204           if (roworiented) value = v[i*n+j];
205           else             value = v[i+j*m];
206           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
207           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
208         }
209       }
210     } else {  /* off processor entry */
211       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
212       if (!baij->donotstash) {
213         mat->assembled = PETSC_FALSE;
214         n_loc          = 0;
215         for (j=0; j<n; j++) {
216           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
217           in_loc[n_loc] = in[j];
218           if (roworiented) {
219             v_loc[n_loc] = v[i*n+j];
220           } else {
221             v_loc[n_loc] = v[j*m+i];
222           }
223           n_loc++;
224         }
225         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
226       }
227     }
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
233 {
234   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
235   PetscErrorCode    ierr;
236   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
237   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
238   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
239   PetscBool         roworiented=a->roworiented;
240   const PetscScalar *value     = v;
241   MatScalar         *ap,*aa = a->a,*bap;
242 
243   PetscFunctionBegin;
244   if (col < row) {
245     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
246     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
247   }
248   rp   = aj + ai[row];
249   ap   = aa + bs2*ai[row];
250   rmax = imax[row];
251   nrow = ailen[row];
252   value = v;
253   low   = 0;
254   high  = nrow;
255 
256   while (high-low > 7) {
257     t = (low+high)/2;
258     if (rp[t] > col) high = t;
259     else             low  = t;
260   }
261   for (i=low; i<high; i++) {
262     if (rp[i] > col) break;
263     if (rp[i] == col) {
264       bap = ap +  bs2*i;
265       if (roworiented) {
266         if (is == ADD_VALUES) {
267           for (ii=0; ii<bs; ii++) {
268             for (jj=ii; jj<bs2; jj+=bs) {
269               bap[jj] += *value++;
270             }
271           }
272         } else {
273           for (ii=0; ii<bs; ii++) {
274             for (jj=ii; jj<bs2; jj+=bs) {
275               bap[jj] = *value++;
276             }
277           }
278         }
279       } else {
280         if (is == ADD_VALUES) {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=0; jj<bs; jj++) {
283               *bap++ += *value++;
284             }
285           }
286         } else {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=0; jj<bs; jj++) {
289               *bap++  = *value++;
290             }
291           }
292         }
293       }
294       goto noinsert2;
295     }
296   }
297   if (nonew == 1) goto noinsert2;
298   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
299   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
300   N = nrow++ - 1; high++;
301   /* shift up all the later entries in this row */
302   for (ii=N; ii>=i; ii--) {
303     rp[ii+1] = rp[ii];
304     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
305   }
306   if (N >= i) {
307     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
308   }
309   rp[i] = col;
310   bap   = ap +  bs2*i;
311   if (roworiented) {
312     for (ii=0; ii<bs; ii++) {
313       for (jj=ii; jj<bs2; jj+=bs) {
314         bap[jj] = *value++;
315       }
316     }
317   } else {
318     for (ii=0; ii<bs; ii++) {
319       for (jj=0; jj<bs; jj++) {
320         *bap++ = *value++;
321       }
322     }
323   }
324   noinsert2:;
325   ailen[row] = nrow;
326   PetscFunctionReturn(0);
327 }
328 
329 /*
330    This routine is exactly duplicated in mpibaij.c
331 */
332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
333 {
334   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
335   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
336   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
337   PetscErrorCode    ierr;
338   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
339   PetscBool         roworiented=a->roworiented;
340   const PetscScalar *value     = v;
341   MatScalar         *ap,*aa = a->a,*bap;
342 
343   PetscFunctionBegin;
344   rp   = aj + ai[row];
345   ap   = aa + bs2*ai[row];
346   rmax = imax[row];
347   nrow = ailen[row];
348   low  = 0;
349   high = nrow;
350   value = v;
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++,value+=bs) {
377             for (jj=0; jj<bs; jj++) {
378               bap[jj] += value[jj];
379             }
380             bap += bs;
381           }
382         } else {
383           for (ii=0; ii<bs; ii++,value+=bs) {
384             for (jj=0; jj<bs; jj++) {
385               bap[jj]  = value[jj];
386             }
387             bap += bs;
388           }
389         }
390       }
391       goto noinsert2;
392     }
393   }
394   if (nonew == 1) goto noinsert2;
395   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
396   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
397   N = nrow++ - 1; high++;
398   /* shift up all the later entries in this row */
399   for (ii=N; ii>=i; ii--) {
400     rp[ii+1] = rp[ii];
401     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
402   }
403   if (N >= i) {
404     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
405   }
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427     This routine could be optimized by removing the need for the block copy below and passing stride information
428   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
429 */
430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
431 {
432   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
433   const MatScalar *value;
434   MatScalar       *barray     =baij->barray;
435   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
436   PetscErrorCode  ierr;
437   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
438   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
439   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   if (!barray) {
443     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
444     baij->barray = barray;
445   }
446 
447   if (roworiented) {
448     stepval = (n-1)*bs;
449   } else {
450     stepval = (m-1)*bs;
451   }
452   for (i=0; i<m; i++) {
453     if (im[i] < 0) continue;
454 #if defined(PETSC_USE_DEBUG)
455     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
456 #endif
457     if (im[i] >= rstart && im[i] < rend) {
458       row = im[i] - rstart;
459       for (j=0; j<n; j++) {
460         if (im[i] > in[j]) {
461           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
462           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
463         }
464         /* If NumCol = 1 then a copy is not required */
465         if ((roworiented) && (n == 1)) {
466           barray = (MatScalar*) v + i*bs2;
467         } else if ((!roworiented) && (m == 1)) {
468           barray = (MatScalar*) v + j*bs2;
469         } else { /* Here a copy is required */
470           if (roworiented) {
471             value = v + i*(stepval+bs)*bs + j*bs;
472           } else {
473             value = v + j*(stepval+bs)*bs + i*bs;
474           }
475           for (ii=0; ii<bs; ii++,value+=stepval) {
476             for (jj=0; jj<bs; jj++) {
477               *barray++ = *value++;
478             }
479           }
480           barray -=bs2;
481         }
482 
483         if (in[j] >= cstart && in[j] < cend) {
484           col  = in[j] - cstart;
485           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
486         } else if (in[j] < 0) continue;
487 #if defined(PETSC_USE_DEBUG)
488         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
489 #endif
490         else {
491           if (mat->was_assembled) {
492             if (!baij->colmap) {
493               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
494             }
495 
496 #if defined(PETSC_USE_DEBUG)
497 #if defined(PETSC_USE_CTABLE)
498             { PetscInt data;
499               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
500               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
501             }
502 #else
503             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
504 #endif
505 #endif
506 #if defined(PETSC_USE_CTABLE)
507             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
508             col  = (col - 1)/bs;
509 #else
510             col = (baij->colmap[in[j]] - 1)/bs;
511 #endif
512             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
513               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
514               col  = in[j];
515             }
516           } else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
522       if (!baij->donotstash) {
523         if (roworiented) {
524           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
535 {
536   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
537   PetscErrorCode ierr;
538   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
539   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
540 
541   PetscFunctionBegin;
542   for (i=0; i<m; i++) {
543     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
544     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
545     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
546       row = idxm[i] - bsrstart;
547       for (j=0; j<n; j++) {
548         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
549         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
550         if (idxn[j] >= bscstart && idxn[j] < bscend) {
551           col  = idxn[j] - bscstart;
552           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
553         } else {
554           if (!baij->colmap) {
555             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
556           }
557 #if defined(PETSC_USE_CTABLE)
558           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
559           data--;
560 #else
561           data = baij->colmap[idxn[j]/bs]-1;
562 #endif
563           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
564           else {
565             col  = data + idxn[j]%bs;
566             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567           }
568         }
569       }
570     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
576 {
577   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
578   PetscErrorCode ierr;
579   PetscReal      sum[2],*lnorm2;
580 
581   PetscFunctionBegin;
582   if (baij->size == 1) {
583     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
584   } else {
585     if (type == NORM_FROBENIUS) {
586       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
587       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
588       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
589       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
590       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
591       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
592       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
593       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
594     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
595       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
596       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
597       PetscReal    *rsum,*rsum2,vabs;
598       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
599       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
600       MatScalar    *v;
601 
602       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
603       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
604       /* Amat */
605       v = amat->a; jj = amat->j;
606       for (brow=0; brow<mbs; brow++) {
607         grow = bs*(rstart + brow);
608         nz   = amat->i[brow+1] - amat->i[brow];
609         for (bcol=0; bcol<nz; bcol++) {
610           gcol = bs*(rstart + *jj); jj++;
611           for (col=0; col<bs; col++) {
612             for (row=0; row<bs; row++) {
613               vabs            = PetscAbsScalar(*v); v++;
614               rsum[gcol+col] += vabs;
615               /* non-diagonal block */
616               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
617             }
618           }
619         }
620         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
621       }
622       /* Bmat */
623       v = bmat->a; jj = bmat->j;
624       for (brow=0; brow<mbs; brow++) {
625         grow = bs*(rstart + brow);
626         nz = bmat->i[brow+1] - bmat->i[brow];
627         for (bcol=0; bcol<nz; bcol++) {
628           gcol = bs*garray[*jj]; jj++;
629           for (col=0; col<bs; col++) {
630             for (row=0; row<bs; row++) {
631               vabs            = PetscAbsScalar(*v); v++;
632               rsum[gcol+col] += vabs;
633               rsum[grow+row] += vabs;
634             }
635           }
636         }
637         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
638       }
639       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640       *norm = 0.0;
641       for (col=0; col<mat->cmap->N; col++) {
642         if (rsum2[col] > *norm) *norm = rsum2[col];
643       }
644       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
653   PetscErrorCode ierr;
654   PetscInt       nstash,reallocs;
655 
656   PetscFunctionBegin;
657   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
658 
659   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
671   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
674   PetscInt       *row,*col;
675   PetscBool      other_disassembled;
676   PetscMPIInt    n;
677   PetscBool      r1,r2,r3;
678   MatScalar      *val;
679 
680   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
681   PetscFunctionBegin;
682   if (!baij->donotstash &&  !mat->nooffprocentries) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       if (!flg) break;
686 
687       for (i=0; i<n;) {
688         /* Now identify the consecutive vals belonging to the same row */
689         for (j=i,rstart=row[j]; j<n; j++) {
690           if (row[j] != rstart) break;
691         }
692         if (j < n) ncols = j-i;
693         else       ncols = n-i;
694         /* Now assemble all these values with a single function call */
695         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
696         i    = j;
697       }
698     }
699     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
700     /* Now process the block-stash. Since the values are stashed column-oriented,
701        set the roworiented flag to column oriented, and after MatSetValues()
702        restore the original flags */
703     r1 = baij->roworiented;
704     r2 = a->roworiented;
705     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
706 
707     baij->roworiented = PETSC_FALSE;
708     a->roworiented    = PETSC_FALSE;
709 
710     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) {
718           if (row[j] != rstart) break;
719         }
720         if (j < n) ncols = j-i;
721         else       ncols = n-i;
722         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
723         i    = j;
724       }
725     }
726     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
727 
728     baij->roworiented = r1;
729     a->roworiented    = r2;
730 
731     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
732   }
733 
734   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
735   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
736 
737   /* determine if any processor has disassembled, if so we must
738      also disassemble ourselfs, in order that we may reassemble. */
739   /*
740      if nonzero structure of submatrix B cannot change then we know that
741      no processor disassembled thus we can skip this stuff
742   */
743   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
744     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745     if (mat->was_assembled && !other_disassembled) {
746       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
747     }
748   }
749 
750   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
751     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
752   }
753   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
754   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
755 
756   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
757 
758   baij->rowvalues = 0;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
762     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
769 #include <petscdraw.h>
770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
773   PetscErrorCode    ierr;
774   PetscInt          bs   = mat->rmap->bs;
775   PetscMPIInt       rank = baij->rank;
776   PetscBool         iascii,isdraw;
777   PetscViewer       sviewer;
778   PetscViewerFormat format;
779 
780   PetscFunctionBegin;
781   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
782   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
783   if (iascii) {
784     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
785     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
786       MatInfo info;
787       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
788       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
789       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
790       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
791       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
793       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
794       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
795       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
796       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
798       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
799       PetscFunctionReturn(0);
800     } else if (format == PETSC_VIEWER_ASCII_INFO) {
801       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw draw;
810     PetscBool isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
813     if (isnull) PetscFunctionReturn(0);
814   }
815 
816   {
817     /* assemble the entire matrix onto first processor. */
818     Mat          A;
819     Mat_SeqSBAIJ *Aloc;
820     Mat_SeqBAIJ  *Bloc;
821     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
822     MatScalar    *a;
823     const char   *matname;
824 
825     /* Should this be the same type as mat? */
826     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
827     if (!rank) {
828       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
829     } else {
830       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
831     }
832     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
833     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
834     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
835     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
836 
837     /* copy over the A part */
838     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
839     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
840     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
841 
842     for (i=0; i<mbs; i++) {
843       rvals[0] = bs*(baij->rstartbs + i);
844       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
845       for (j=ai[i]; j<ai[i+1]; j++) {
846         col = (baij->cstartbs+aj[j])*bs;
847         for (k=0; k<bs; k++) {
848           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
849           col++;
850           a += bs;
851         }
852       }
853     }
854     /* copy over the B part */
855     Bloc = (Mat_SeqBAIJ*)baij->B->data;
856     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
857     for (i=0; i<mbs; i++) {
858 
859       rvals[0] = bs*(baij->rstartbs + i);
860       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
861       for (j=ai[i]; j<ai[i+1]; j++) {
862         col = baij->garray[aj[j]]*bs;
863         for (k=0; k<bs; k++) {
864           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
865           col++;
866           a += bs;
867         }
868       }
869     }
870     ierr = PetscFree(rvals);CHKERRQ(ierr);
871     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
872     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873     /*
874        Everyone has to call to draw the matrix since the graphics waits are
875        synchronized across all processors that share the PetscDraw object
876     */
877     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
878     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
879     if (!rank) {
880       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
881       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
882     }
883     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
884     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
885     ierr = MatDestroy(&A);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
891 {
892   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
893   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
894   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
895   PetscErrorCode ierr;
896   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
897   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
898   int            fd;
899   PetscScalar    *column_values;
900   FILE           *file;
901   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
902   PetscInt       message_count,flowcontrolcount;
903 
904   PetscFunctionBegin;
905   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
906   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
907   nz   = bs2*(A->nz + B->nz);
908   rlen = mat->rmap->n;
909   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
910   if (!rank) {
911     header[0] = MAT_FILE_CLASSID;
912     header[1] = mat->rmap->N;
913     header[2] = mat->cmap->N;
914 
915     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
916     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
917     /* get largest number of rows any processor has */
918     range = mat->rmap->range;
919     for (i=1; i<size; i++) {
920       rlen = PetscMax(rlen,range[i+1] - range[i]);
921     }
922   } else {
923     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
924   }
925 
926   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
927   /* compute lengths of each row  */
928   for (i=0; i<a->mbs; i++) {
929     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
930   }
931   /* store the row lengths to the file */
932   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
933   if (!rank) {
934     MPI_Status status;
935     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
936     rlen = (range[1] - range[0])/bs;
937     for (i=0; i<rlen; i++) {
938       for (j=0; j<bs; j++) {
939         row_lens[i*bs+j] = bs*crow_lens[i];
940       }
941     }
942     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     for (i=1; i<size; i++) {
944       rlen = (range[i+1] - range[i])/bs;
945       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
946       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
947       for (k=0; k<rlen; k++) {
948         for (j=0; j<bs; j++) {
949           row_lens[k*bs+j] = bs*crow_lens[k];
950         }
951       }
952       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
953     }
954     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
955     ierr = PetscFree(row_lens);CHKERRQ(ierr);
956   } else {
957     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
958     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
960   }
961   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
962 
963   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
964      information needed to make it for each row from a block row. This does require more communication but still not more than
965      the communication needed for the nonzero values  */
966   nzmax = nz; /*  space a largest processor needs */
967   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
968   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
969   cnt   = 0;
970   for (i=0; i<a->mbs; i++) {
971     pcnt = cnt;
972     for (j=B->i[i]; j<B->i[i+1]; j++) {
973       if ((col = garray[B->j[j]]) > cstart) break;
974       for (l=0; l<bs; l++) {
975         column_indices[cnt++] = bs*col+l;
976       }
977     }
978     for (k=A->i[i]; k<A->i[i+1]; k++) {
979       for (l=0; l<bs; l++) {
980         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
981       }
982     }
983     for (; j<B->i[i+1]; j++) {
984       for (l=0; l<bs; l++) {
985         column_indices[cnt++] = bs*garray[B->j[j]]+l;
986       }
987     }
988     len = cnt - pcnt;
989     for (k=1; k<bs; k++) {
990       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
991       cnt += len;
992     }
993   }
994   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
995 
996   /* store the columns to the file */
997   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
998   if (!rank) {
999     MPI_Status status;
1000     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1001     for (i=1; i<size; i++) {
1002       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1003       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1004       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1005       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1006     }
1007     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1008   } else {
1009     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1010     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1012     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the numerical values */
1017   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<a->mbs; i++) {
1020     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1021     for (j=B->i[i]; j<B->i[i+1]; j++) {
1022       if (garray[B->j[j]] > cstart) break;
1023       for (l=0; l<bs; l++) {
1024         for (ll=0; ll<bs; ll++) {
1025           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1026         }
1027       }
1028       cnt += bs;
1029     }
1030     for (k=A->i[i]; k<A->i[i+1]; k++) {
1031       for (l=0; l<bs; l++) {
1032         for (ll=0; ll<bs; ll++) {
1033           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1034         }
1035       }
1036       cnt += bs;
1037     }
1038     for (; j<B->i[i+1]; j++) {
1039       for (l=0; l<bs; l++) {
1040         for (ll=0; ll<bs; ll++) {
1041           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1042         }
1043       }
1044       cnt += bs;
1045     }
1046     cnt += (bs-1)*rlen;
1047   }
1048   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1049 
1050   /* store the column values to the file */
1051   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1052   if (!rank) {
1053     MPI_Status status;
1054     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1055     for (i=1; i<size; i++) {
1056       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1057       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1058       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1059       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1060     }
1061     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1062   } else {
1063     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1064     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1065     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1066     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1067   }
1068   ierr = PetscFree(column_values);CHKERRQ(ierr);
1069 
1070   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1071   if (file) {
1072     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1078 {
1079   PetscErrorCode ierr;
1080   PetscBool      iascii,isdraw,issocket,isbinary;
1081 
1082   PetscFunctionBegin;
1083   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1084   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1085   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1086   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1087   if (iascii || isdraw || issocket) {
1088     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1089   } else if (isbinary) {
1090     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1096 {
1097   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101 #if defined(PETSC_USE_LOG)
1102   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1103 #endif
1104   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1105   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1106   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1107   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1108 #if defined(PETSC_USE_CTABLE)
1109   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1110 #else
1111   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1112 #endif
1113   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1114   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1115   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1116   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1117   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1118   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1119   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1120   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1121   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1122   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1124   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1125   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1126   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1127   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1128 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1129   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1130 #endif
1131   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1132   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1133   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1134   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1135 
1136   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140 #if defined(PETSC_HAVE_ELEMENTAL)
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1142 #endif
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1149   PetscErrorCode    ierr;
1150   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1151   PetscScalar       *from;
1152   const PetscScalar *x;
1153 
1154   PetscFunctionBegin;
1155   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1156   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1157 
1158   /* diagonal part */
1159   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1160   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1161 
1162   /* subdiagonal part */
1163   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1164 
1165   /* copy x into the vec slvec0 */
1166   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1167   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1168 
1169   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1170   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1171   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1172 
1173   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1174   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175   /* supperdiagonal part */
1176   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1181 {
1182   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1183   PetscErrorCode    ierr;
1184   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1185   PetscScalar       *from;
1186   const PetscScalar *x;
1187 
1188   PetscFunctionBegin;
1189   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1190   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1191 
1192   /* diagonal part */
1193   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1194   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1195 
1196   /* subdiagonal part */
1197   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1198 
1199   /* copy x into the vec slvec0 */
1200   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1201   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1202 
1203   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1204   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1205   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1206 
1207   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1208   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1209   /* supperdiagonal part */
1210   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1215 {
1216   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       nt;
1219 
1220   PetscFunctionBegin;
1221   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1222   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1223 
1224   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1225   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1226 
1227   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228   /* do diagonal part */
1229   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1230   /* do supperdiagonal part */
1231   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1233   /* do subdiagonal part */
1234   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1235   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1236   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1241 {
1242   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1243   PetscErrorCode    ierr;
1244   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1245   PetscScalar       *from,zero=0.0;
1246   const PetscScalar *x;
1247 
1248   PetscFunctionBegin;
1249   /*
1250   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1251   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1252   */
1253   /* diagonal part */
1254   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1255   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1256 
1257   /* subdiagonal part */
1258   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1259 
1260   /* copy x into the vec slvec0 */
1261   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1262   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1263   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1264   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1265 
1266   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1268   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269 
1270   /* supperdiagonal part */
1271   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1276 {
1277   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1278   PetscErrorCode ierr;
1279 
1280   PetscFunctionBegin;
1281   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282   /* do diagonal part */
1283   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1284   /* do supperdiagonal part */
1285   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1286   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1287 
1288   /* do subdiagonal part */
1289   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1291   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 /*
1296   This only works correctly for square matrices where the subblock A->A is the
1297    diagonal block
1298 */
1299 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1300 {
1301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1306   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1317   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1324   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1325   PetscErrorCode ierr;
1326   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1327   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1328   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1329 
1330   PetscFunctionBegin;
1331   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1332   mat->getrowactive = PETSC_TRUE;
1333 
1334   if (!mat->rowvalues && (idx || v)) {
1335     /*
1336         allocate enough space to hold information from the longest row.
1337     */
1338     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1339     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1340     PetscInt     max = 1,mbs = mat->mbs,tmp;
1341     for (i=0; i<mbs; i++) {
1342       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1343       if (max < tmp) max = tmp;
1344     }
1345     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1346   }
1347 
1348   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1349   lrow = row - brstart;  /* local row index */
1350 
1351   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1352   if (!v)   {pvA = 0; pvB = 0;}
1353   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1354   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1355   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1356   nztot = nzA + nzB;
1357 
1358   cmap = mat->garray;
1359   if (v  || idx) {
1360     if (nztot) {
1361       /* Sort by increasing column numbers, assuming A and B already sorted */
1362       PetscInt imark = -1;
1363       if (v) {
1364         *v = v_p = mat->rowvalues;
1365         for (i=0; i<nzB; i++) {
1366           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1367           else break;
1368         }
1369         imark = i;
1370         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1371         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1372       }
1373       if (idx) {
1374         *idx = idx_p = mat->rowindices;
1375         if (imark > -1) {
1376           for (i=0; i<imark; i++) {
1377             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1378           }
1379         } else {
1380           for (i=0; i<nzB; i++) {
1381             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1382             else break;
1383           }
1384           imark = i;
1385         }
1386         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1387         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1388       }
1389     } else {
1390       if (idx) *idx = 0;
1391       if (v)   *v   = 0;
1392     }
1393   }
1394   *nz  = nztot;
1395   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1396   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1401 {
1402   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1403 
1404   PetscFunctionBegin;
1405   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1406   baij->getrowactive = PETSC_FALSE;
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1411 {
1412   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1413   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1414 
1415   PetscFunctionBegin;
1416   aA->getrow_utriangular = PETSC_TRUE;
1417   PetscFunctionReturn(0);
1418 }
1419 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1420 {
1421   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1422   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1423 
1424   PetscFunctionBegin;
1425   aA->getrow_utriangular = PETSC_FALSE;
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1430 {
1431   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1432   PetscErrorCode ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1436   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1441 {
1442   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1443   PetscErrorCode ierr;
1444 
1445   PetscFunctionBegin;
1446   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1447   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1452    Input: isrow       - distributed(parallel),
1453           iscol_local - locally owned (seq)
1454 */
1455 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1456 {
1457   PetscErrorCode ierr;
1458   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1459   const PetscInt *ptr1,*ptr2;
1460 
1461   PetscFunctionBegin;
1462   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1463   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1464   if (sz1 > sz2) {
1465     *flg = PETSC_FALSE;
1466     PetscFunctionReturn(0);
1467   }
1468 
1469   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1470   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1471 
1472   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1473   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1474   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1475   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1476   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1477   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1478 
1479   nmatch=0;
1480   k     = 0;
1481   for (i=0; i<sz1; i++){
1482     for (j=k; j<sz2; j++){
1483       if (a1[i] == a2[j]) {
1484         k = j; nmatch++;
1485         break;
1486       }
1487     }
1488   }
1489   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1490   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1491   ierr = PetscFree(a1);CHKERRQ(ierr);
1492   ierr = PetscFree(a2);CHKERRQ(ierr);
1493   if (nmatch < sz1) {
1494     *flg = PETSC_FALSE;
1495   } else {
1496     *flg = PETSC_TRUE;
1497   }
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1502 {
1503   PetscErrorCode ierr;
1504   IS             iscol_local;
1505   PetscInt       csize;
1506   PetscBool      isequal;
1507 
1508   PetscFunctionBegin;
1509   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1510   if (call == MAT_REUSE_MATRIX) {
1511     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1512     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1513   } else {
1514     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1515     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1516     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1517   }
1518 
1519   /* now call MatCreateSubMatrix_MPIBAIJ() */
1520   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1521   if (call == MAT_INITIAL_MATRIX) {
1522     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1523     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1524   }
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1529 {
1530   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1535   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1540 {
1541   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1542   Mat            A  = a->A,B = a->B;
1543   PetscErrorCode ierr;
1544   PetscReal      isend[5],irecv[5];
1545 
1546   PetscFunctionBegin;
1547   info->block_size = (PetscReal)matin->rmap->bs;
1548 
1549   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1550 
1551   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1552   isend[3] = info->memory;  isend[4] = info->mallocs;
1553 
1554   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1555 
1556   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1557   isend[3] += info->memory;  isend[4] += info->mallocs;
1558   if (flag == MAT_LOCAL) {
1559     info->nz_used      = isend[0];
1560     info->nz_allocated = isend[1];
1561     info->nz_unneeded  = isend[2];
1562     info->memory       = isend[3];
1563     info->mallocs      = isend[4];
1564   } else if (flag == MAT_GLOBAL_MAX) {
1565     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1566 
1567     info->nz_used      = irecv[0];
1568     info->nz_allocated = irecv[1];
1569     info->nz_unneeded  = irecv[2];
1570     info->memory       = irecv[3];
1571     info->mallocs      = irecv[4];
1572   } else if (flag == MAT_GLOBAL_SUM) {
1573     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1574 
1575     info->nz_used      = irecv[0];
1576     info->nz_allocated = irecv[1];
1577     info->nz_unneeded  = irecv[2];
1578     info->memory       = irecv[3];
1579     info->mallocs      = irecv[4];
1580   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1581   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1582   info->fill_ratio_needed = 0;
1583   info->factor_mallocs    = 0;
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1588 {
1589   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1590   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1591   PetscErrorCode ierr;
1592 
1593   PetscFunctionBegin;
1594   switch (op) {
1595   case MAT_NEW_NONZERO_LOCATIONS:
1596   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1597   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1598   case MAT_KEEP_NONZERO_PATTERN:
1599   case MAT_SUBMAT_SINGLEIS:
1600   case MAT_NEW_NONZERO_LOCATION_ERR:
1601     MatCheckPreallocated(A,1);
1602     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1603     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1604     break;
1605   case MAT_ROW_ORIENTED:
1606     MatCheckPreallocated(A,1);
1607     a->roworiented = flg;
1608 
1609     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1610     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1611     break;
1612   case MAT_NEW_DIAGONALS:
1613     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1614     break;
1615   case MAT_IGNORE_OFF_PROC_ENTRIES:
1616     a->donotstash = flg;
1617     break;
1618   case MAT_USE_HASH_TABLE:
1619     a->ht_flag = flg;
1620     break;
1621   case MAT_HERMITIAN:
1622     MatCheckPreallocated(A,1);
1623     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1624     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1625 #if defined(PETSC_USE_COMPLEX)
1626     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1627 #endif
1628     break;
1629   case MAT_SPD:
1630     A->spd_set = PETSC_TRUE;
1631     A->spd     = flg;
1632     if (flg) {
1633       A->symmetric                  = PETSC_TRUE;
1634       A->structurally_symmetric     = PETSC_TRUE;
1635       A->symmetric_set              = PETSC_TRUE;
1636       A->structurally_symmetric_set = PETSC_TRUE;
1637     }
1638     break;
1639   case MAT_SYMMETRIC:
1640     MatCheckPreallocated(A,1);
1641     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1642     break;
1643   case MAT_STRUCTURALLY_SYMMETRIC:
1644     MatCheckPreallocated(A,1);
1645     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1646     break;
1647   case MAT_SYMMETRY_ETERNAL:
1648     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1649     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1650     break;
1651   case MAT_IGNORE_LOWER_TRIANGULAR:
1652     aA->ignore_ltriangular = flg;
1653     break;
1654   case MAT_ERROR_LOWER_TRIANGULAR:
1655     aA->ignore_ltriangular = flg;
1656     break;
1657   case MAT_GETROW_UPPERTRIANGULAR:
1658     aA->getrow_utriangular = flg;
1659     break;
1660   default:
1661     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1662   }
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1667 {
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671   if (reuse == MAT_INITIAL_MATRIX) {
1672     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1673   }  else if (reuse == MAT_REUSE_MATRIX) {
1674     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1680 {
1681   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1682   Mat            a     = baij->A, b=baij->B;
1683   PetscErrorCode ierr;
1684   PetscInt       nv,m,n;
1685   PetscBool      flg;
1686 
1687   PetscFunctionBegin;
1688   if (ll != rr) {
1689     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1690     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1691   }
1692   if (!ll) PetscFunctionReturn(0);
1693 
1694   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1695   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1696 
1697   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1698   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1699 
1700   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1701 
1702   /* left diagonalscale the off-diagonal part */
1703   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1704 
1705   /* scale the diagonal part */
1706   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1707 
1708   /* right diagonalscale the off-diagonal part */
1709   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1715 {
1716   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1717   PetscErrorCode ierr;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1725 
1726 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1727 {
1728   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1729   Mat            a,b,c,d;
1730   PetscBool      flg;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   a = matA->A; b = matA->B;
1735   c = matB->A; d = matB->B;
1736 
1737   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1738   if (flg) {
1739     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1740   }
1741   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1746 {
1747   PetscErrorCode ierr;
1748   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1749   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1750 
1751   PetscFunctionBegin;
1752   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1753   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1754     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1755     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1756     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1757   } else {
1758     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1759     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1760   }
1761   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1766 {
1767   PetscErrorCode ierr;
1768 
1769   PetscFunctionBegin;
1770   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1775 {
1776   PetscErrorCode ierr;
1777   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1778   PetscBLASInt   bnz,one=1;
1779   Mat_SeqSBAIJ   *xa,*ya;
1780   Mat_SeqBAIJ    *xb,*yb;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1786     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1787     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1788     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1789     xb   = (Mat_SeqBAIJ*)xx->B->data;
1790     yb   = (Mat_SeqBAIJ*)yy->B->data;
1791     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1792     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1793     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1794   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1795     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1796     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1797     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1798   } else {
1799     Mat      B;
1800     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1801     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1802     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1803     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1804     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1805     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1806     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1807     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1808     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1809     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1810     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1811     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1812     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1813     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1814     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1815     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1816     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1817     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1818     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1819     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1820   }
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1825 {
1826   PetscErrorCode ierr;
1827   PetscInt       i;
1828   PetscBool      flg;
1829 
1830   PetscFunctionBegin;
1831   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1832   for (i=0; i<n; i++) {
1833     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1834     if (!flg) {
1835       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1836     }
1837   }
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1842 {
1843   PetscErrorCode ierr;
1844   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1845   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1846 
1847   PetscFunctionBegin;
1848   if (!Y->preallocated) {
1849     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1850   } else if (!aij->nz) {
1851     PetscInt nonew = aij->nonew;
1852     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1853     aij->nonew = nonew;
1854   }
1855   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1860 {
1861   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1866   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1867   if (d) {
1868     PetscInt rstart;
1869     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1870     *d += rstart/A->rmap->bs;
1871 
1872   }
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1877 {
1878   PetscFunctionBegin;
1879   *a = ((Mat_MPISBAIJ*)A->data)->A;
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 /* -------------------------------------------------------------------*/
1884 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1885                                        MatGetRow_MPISBAIJ,
1886                                        MatRestoreRow_MPISBAIJ,
1887                                        MatMult_MPISBAIJ,
1888                                /*  4*/ MatMultAdd_MPISBAIJ,
1889                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1890                                        MatMultAdd_MPISBAIJ,
1891                                        0,
1892                                        0,
1893                                        0,
1894                                /* 10*/ 0,
1895                                        0,
1896                                        0,
1897                                        MatSOR_MPISBAIJ,
1898                                        MatTranspose_MPISBAIJ,
1899                                /* 15*/ MatGetInfo_MPISBAIJ,
1900                                        MatEqual_MPISBAIJ,
1901                                        MatGetDiagonal_MPISBAIJ,
1902                                        MatDiagonalScale_MPISBAIJ,
1903                                        MatNorm_MPISBAIJ,
1904                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1905                                        MatAssemblyEnd_MPISBAIJ,
1906                                        MatSetOption_MPISBAIJ,
1907                                        MatZeroEntries_MPISBAIJ,
1908                                /* 24*/ 0,
1909                                        0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                /* 29*/ MatSetUp_MPISBAIJ,
1914                                        0,
1915                                        0,
1916                                        MatGetDiagonalBlock_MPISBAIJ,
1917                                        0,
1918                                /* 34*/ MatDuplicate_MPISBAIJ,
1919                                        0,
1920                                        0,
1921                                        0,
1922                                        0,
1923                                /* 39*/ MatAXPY_MPISBAIJ,
1924                                        MatCreateSubMatrices_MPISBAIJ,
1925                                        MatIncreaseOverlap_MPISBAIJ,
1926                                        MatGetValues_MPISBAIJ,
1927                                        MatCopy_MPISBAIJ,
1928                                /* 44*/ 0,
1929                                        MatScale_MPISBAIJ,
1930                                        MatShift_MPISBAIJ,
1931                                        0,
1932                                        0,
1933                                /* 49*/ 0,
1934                                        0,
1935                                        0,
1936                                        0,
1937                                        0,
1938                                /* 54*/ 0,
1939                                        0,
1940                                        MatSetUnfactored_MPISBAIJ,
1941                                        0,
1942                                        MatSetValuesBlocked_MPISBAIJ,
1943                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1944                                        0,
1945                                        0,
1946                                        0,
1947                                        0,
1948                                /* 64*/ 0,
1949                                        0,
1950                                        0,
1951                                        0,
1952                                        0,
1953                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1954                                        0,
1955                                        0,
1956                                        0,
1957                                        0,
1958                                /* 74*/ 0,
1959                                        0,
1960                                        0,
1961                                        0,
1962                                        0,
1963                                /* 79*/ 0,
1964                                        0,
1965                                        0,
1966                                        0,
1967                                        MatLoad_MPISBAIJ,
1968                                /* 84*/ 0,
1969                                        0,
1970                                        0,
1971                                        0,
1972                                        0,
1973                                /* 89*/ 0,
1974                                        0,
1975                                        0,
1976                                        0,
1977                                        0,
1978                                /* 94*/ 0,
1979                                        0,
1980                                        0,
1981                                        0,
1982                                        0,
1983                                /* 99*/ 0,
1984                                        0,
1985                                        0,
1986                                        0,
1987                                        0,
1988                                /*104*/ 0,
1989                                        MatRealPart_MPISBAIJ,
1990                                        MatImaginaryPart_MPISBAIJ,
1991                                        MatGetRowUpperTriangular_MPISBAIJ,
1992                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1993                                /*109*/ 0,
1994                                        0,
1995                                        0,
1996                                        0,
1997                                        MatMissingDiagonal_MPISBAIJ,
1998                                /*114*/ 0,
1999                                        0,
2000                                        0,
2001                                        0,
2002                                        0,
2003                                /*119*/ 0,
2004                                        0,
2005                                        0,
2006                                        0,
2007                                        0,
2008                                /*124*/ 0,
2009                                        0,
2010                                        0,
2011                                        0,
2012                                        0,
2013                                /*129*/ 0,
2014                                        0,
2015                                        0,
2016                                        0,
2017                                        0,
2018                                /*134*/ 0,
2019                                        0,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                /*139*/ MatSetBlockSizes_Default,
2024                                        0,
2025                                        0,
2026                                        0,
2027                                        0,
2028                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2029 };
2030 
2031 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2032 {
2033   Mat_MPISBAIJ   *b;
2034   PetscErrorCode ierr;
2035   PetscInt       i,mbs,Mbs;
2036 
2037   PetscFunctionBegin;
2038   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2039   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2040   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2041   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2042 
2043   b   = (Mat_MPISBAIJ*)B->data;
2044   mbs = B->rmap->n/bs;
2045   Mbs = B->rmap->N/bs;
2046   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2047 
2048   B->rmap->bs = bs;
2049   b->bs2      = bs*bs;
2050   b->mbs      = mbs;
2051   b->Mbs      = Mbs;
2052   b->nbs      = B->cmap->n/bs;
2053   b->Nbs      = B->cmap->N/bs;
2054 
2055   for (i=0; i<=b->size; i++) {
2056     b->rangebs[i] = B->rmap->range[i]/bs;
2057   }
2058   b->rstartbs = B->rmap->rstart/bs;
2059   b->rendbs   = B->rmap->rend/bs;
2060 
2061   b->cstartbs = B->cmap->rstart/bs;
2062   b->cendbs   = B->cmap->rend/bs;
2063 
2064 #if defined(PETSC_USE_CTABLE)
2065   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2066 #else
2067   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2068 #endif
2069   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2070   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2071   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2072   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2073   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2074   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2075   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2076   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2077   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2078 
2079   /* Because the B will have been resized we simply destroy it and create a new one each time */
2080   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2081   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2082   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2083   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2084   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2085 
2086   if (!B->preallocated) {
2087     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2088     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2089     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2090     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2091     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2092   }
2093 
2094   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2095   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2096 
2097   B->preallocated  = PETSC_TRUE;
2098   B->was_assembled = PETSC_FALSE;
2099   B->assembled     = PETSC_FALSE;
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2104 {
2105   PetscInt       m,rstart,cstart,cend;
2106   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2107   const PetscInt *JJ    =0;
2108   PetscScalar    *values=0;
2109   PetscErrorCode ierr;
2110 
2111   PetscFunctionBegin;
2112   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2113   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2114   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2115   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2116   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2117   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2118   m      = B->rmap->n/bs;
2119   rstart = B->rmap->rstart/bs;
2120   cstart = B->cmap->rstart/bs;
2121   cend   = B->cmap->rend/bs;
2122 
2123   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2124   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2125   for (i=0; i<m; i++) {
2126     nz = ii[i+1] - ii[i];
2127     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2128     nz_max = PetscMax(nz_max,nz);
2129     JJ     = jj + ii[i];
2130     for (j=0; j<nz; j++) {
2131       if (*JJ >= cstart) break;
2132       JJ++;
2133     }
2134     d = 0;
2135     for (; j<nz; j++) {
2136       if (*JJ++ >= cend) break;
2137       d++;
2138     }
2139     d_nnz[i] = d;
2140     o_nnz[i] = nz - d;
2141   }
2142   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2143   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2144 
2145   values = (PetscScalar*)V;
2146   if (!values) {
2147     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2148     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2149   }
2150   for (i=0; i<m; i++) {
2151     PetscInt          row    = i + rstart;
2152     PetscInt          ncols  = ii[i+1] - ii[i];
2153     const PetscInt    *icols = jj + ii[i];
2154     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2155     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2156   }
2157 
2158   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2159   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 /*MC
2166    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2167    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2168    the matrix is stored.
2169 
2170   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2171   can call MatSetOption(Mat, MAT_HERMITIAN);
2172 
2173    Options Database Keys:
2174 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2175 
2176   Level: beginner
2177 
2178 .seealso: MatCreateMPISBAIJ
2179 M*/
2180 
2181 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2182 {
2183   Mat_MPISBAIJ   *b;
2184   PetscErrorCode ierr;
2185   PetscBool      flg = PETSC_FALSE;
2186 
2187   PetscFunctionBegin;
2188   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2189   B->data = (void*)b;
2190   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2191 
2192   B->ops->destroy = MatDestroy_MPISBAIJ;
2193   B->ops->view    = MatView_MPISBAIJ;
2194   B->assembled    = PETSC_FALSE;
2195   B->insertmode   = NOT_SET_VALUES;
2196 
2197   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2198   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2199 
2200   /* build local table of row and column ownerships */
2201   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2202 
2203   /* build cache for off array entries formed */
2204   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2205 
2206   b->donotstash  = PETSC_FALSE;
2207   b->colmap      = NULL;
2208   b->garray      = NULL;
2209   b->roworiented = PETSC_TRUE;
2210 
2211   /* stuff used in block assembly */
2212   b->barray = 0;
2213 
2214   /* stuff used for matrix vector multiply */
2215   b->lvec    = 0;
2216   b->Mvctx   = 0;
2217   b->slvec0  = 0;
2218   b->slvec0b = 0;
2219   b->slvec1  = 0;
2220   b->slvec1a = 0;
2221   b->slvec1b = 0;
2222   b->sMvctx  = 0;
2223 
2224   /* stuff for MatGetRow() */
2225   b->rowindices   = 0;
2226   b->rowvalues    = 0;
2227   b->getrowactive = PETSC_FALSE;
2228 
2229   /* hash table stuff */
2230   b->ht           = 0;
2231   b->hd           = 0;
2232   b->ht_size      = 0;
2233   b->ht_flag      = PETSC_FALSE;
2234   b->ht_fact      = 0;
2235   b->ht_total_ct  = 0;
2236   b->ht_insert_ct = 0;
2237 
2238   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2239   b->ijonly = PETSC_FALSE;
2240 
2241   b->in_loc = 0;
2242   b->v_loc  = 0;
2243   b->n_loc  = 0;
2244 
2245   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2246   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2249 #if defined(PETSC_HAVE_ELEMENTAL)
2250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2251 #endif
2252 
2253   B->symmetric                  = PETSC_TRUE;
2254   B->structurally_symmetric     = PETSC_TRUE;
2255   B->symmetric_set              = PETSC_TRUE;
2256   B->structurally_symmetric_set = PETSC_TRUE;
2257   B->symmetric_eternal          = PETSC_TRUE;
2258 
2259   B->hermitian                  = PETSC_FALSE;
2260   B->hermitian_set              = PETSC_FALSE;
2261 
2262   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2263   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2264   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2265   if (flg) {
2266     PetscReal fact = 1.39;
2267     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2268     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2269     if (fact <= 1.0) fact = 1.39;
2270     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2271     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2272   }
2273   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 /*MC
2278    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2279 
2280    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2281    and MATMPISBAIJ otherwise.
2282 
2283    Options Database Keys:
2284 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2285 
2286   Level: beginner
2287 
2288 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2289 M*/
2290 
2291 /*@C
2292    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2293    the user should preallocate the matrix storage by setting the parameters
2294    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2295    performance can be increased by more than a factor of 50.
2296 
2297    Collective on Mat
2298 
2299    Input Parameters:
2300 +  B - the matrix
2301 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2302           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2303 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2304            submatrix  (same for all local rows)
2305 .  d_nnz - array containing the number of block nonzeros in the various block rows
2306            in the upper triangular and diagonal part of the in diagonal portion of the local
2307            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2308            for the diagonal entry and set a value even if it is zero.
2309 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2310            submatrix (same for all local rows).
2311 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2312            off-diagonal portion of the local submatrix that is right of the diagonal
2313            (possibly different for each block row) or NULL.
2314 
2315 
2316    Options Database Keys:
2317 .   -mat_no_unroll - uses code that does not unroll the loops in the
2318                      block calculations (much slower)
2319 .   -mat_block_size - size of the blocks to use
2320 
2321    Notes:
2322 
2323    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2324    than it must be used on all processors that share the object for that argument.
2325 
2326    If the *_nnz parameter is given then the *_nz parameter is ignored
2327 
2328    Storage Information:
2329    For a square global matrix we define each processor's diagonal portion
2330    to be its local rows and the corresponding columns (a square submatrix);
2331    each processor's off-diagonal portion encompasses the remainder of the
2332    local matrix (a rectangular submatrix).
2333 
2334    The user can specify preallocated storage for the diagonal part of
2335    the local submatrix with either d_nz or d_nnz (not both).  Set
2336    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2337    memory allocation.  Likewise, specify preallocated storage for the
2338    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2339 
2340    You can call MatGetInfo() to get information on how effective the preallocation was;
2341    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2342    You can also run with the option -info and look for messages with the string
2343    malloc in them to see if additional memory allocation was needed.
2344 
2345    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2346    the figure below we depict these three local rows and all columns (0-11).
2347 
2348 .vb
2349            0 1 2 3 4 5 6 7 8 9 10 11
2350           --------------------------
2351    row 3  |. . . d d d o o o o  o  o
2352    row 4  |. . . d d d o o o o  o  o
2353    row 5  |. . . d d d o o o o  o  o
2354           --------------------------
2355 .ve
2356 
2357    Thus, any entries in the d locations are stored in the d (diagonal)
2358    submatrix, and any entries in the o locations are stored in the
2359    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2360    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2361 
2362    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2363    plus the diagonal part of the d matrix,
2364    and o_nz should indicate the number of block nonzeros per row in the o matrix
2365 
2366    In general, for PDE problems in which most nonzeros are near the diagonal,
2367    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2368    or you will get TERRIBLE performance; see the users' manual chapter on
2369    matrices.
2370 
2371    Level: intermediate
2372 
2373 .keywords: matrix, block, aij, compressed row, sparse, parallel
2374 
2375 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2376 @*/
2377 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2378 {
2379   PetscErrorCode ierr;
2380 
2381   PetscFunctionBegin;
2382   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2383   PetscValidType(B,1);
2384   PetscValidLogicalCollectiveInt(B,bs,2);
2385   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 /*@C
2390    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2391    (block compressed row).  For good matrix assembly performance
2392    the user should preallocate the matrix storage by setting the parameters
2393    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2394    performance can be increased by more than a factor of 50.
2395 
2396    Collective on MPI_Comm
2397 
2398    Input Parameters:
2399 +  comm - MPI communicator
2400 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2401           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2402 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2403            This value should be the same as the local size used in creating the
2404            y vector for the matrix-vector product y = Ax.
2405 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2406            This value should be the same as the local size used in creating the
2407            x vector for the matrix-vector product y = Ax.
2408 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2409 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2410 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2411            submatrix  (same for all local rows)
2412 .  d_nnz - array containing the number of block nonzeros in the various block rows
2413            in the upper triangular portion of the in diagonal portion of the local
2414            (possibly different for each block block row) or NULL.
2415            If you plan to factor the matrix you must leave room for the diagonal entry and
2416            set its value even if it is zero.
2417 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2418            submatrix (same for all local rows).
2419 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2420            off-diagonal portion of the local submatrix (possibly different for
2421            each block row) or NULL.
2422 
2423    Output Parameter:
2424 .  A - the matrix
2425 
2426    Options Database Keys:
2427 .   -mat_no_unroll - uses code that does not unroll the loops in the
2428                      block calculations (much slower)
2429 .   -mat_block_size - size of the blocks to use
2430 .   -mat_mpi - use the parallel matrix data structures even on one processor
2431                (defaults to using SeqBAIJ format on one processor)
2432 
2433    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2434    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2435    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2436 
2437    Notes:
2438    The number of rows and columns must be divisible by blocksize.
2439    This matrix type does not support complex Hermitian operation.
2440 
2441    The user MUST specify either the local or global matrix dimensions
2442    (possibly both).
2443 
2444    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2445    than it must be used on all processors that share the object for that argument.
2446 
2447    If the *_nnz parameter is given then the *_nz parameter is ignored
2448 
2449    Storage Information:
2450    For a square global matrix we define each processor's diagonal portion
2451    to be its local rows and the corresponding columns (a square submatrix);
2452    each processor's off-diagonal portion encompasses the remainder of the
2453    local matrix (a rectangular submatrix).
2454 
2455    The user can specify preallocated storage for the diagonal part of
2456    the local submatrix with either d_nz or d_nnz (not both).  Set
2457    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2458    memory allocation.  Likewise, specify preallocated storage for the
2459    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2460 
2461    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2462    the figure below we depict these three local rows and all columns (0-11).
2463 
2464 .vb
2465            0 1 2 3 4 5 6 7 8 9 10 11
2466           --------------------------
2467    row 3  |. . . d d d o o o o  o  o
2468    row 4  |. . . d d d o o o o  o  o
2469    row 5  |. . . d d d o o o o  o  o
2470           --------------------------
2471 .ve
2472 
2473    Thus, any entries in the d locations are stored in the d (diagonal)
2474    submatrix, and any entries in the o locations are stored in the
2475    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2476    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2477 
2478    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2479    plus the diagonal part of the d matrix,
2480    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2481    In general, for PDE problems in which most nonzeros are near the diagonal,
2482    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2483    or you will get TERRIBLE performance; see the users' manual chapter on
2484    matrices.
2485 
2486    Level: intermediate
2487 
2488 .keywords: matrix, block, aij, compressed row, sparse, parallel
2489 
2490 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2491 @*/
2492 
2493 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2494 {
2495   PetscErrorCode ierr;
2496   PetscMPIInt    size;
2497 
2498   PetscFunctionBegin;
2499   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2500   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2501   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2502   if (size > 1) {
2503     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2504     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2505   } else {
2506     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2507     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2508   }
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 
2513 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2514 {
2515   Mat            mat;
2516   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2517   PetscErrorCode ierr;
2518   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2519   PetscScalar    *array;
2520 
2521   PetscFunctionBegin;
2522   *newmat = 0;
2523 
2524   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2525   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2526   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2527   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2528   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2529 
2530   mat->factortype   = matin->factortype;
2531   mat->preallocated = PETSC_TRUE;
2532   mat->assembled    = PETSC_TRUE;
2533   mat->insertmode   = NOT_SET_VALUES;
2534 
2535   a      = (Mat_MPISBAIJ*)mat->data;
2536   a->bs2 = oldmat->bs2;
2537   a->mbs = oldmat->mbs;
2538   a->nbs = oldmat->nbs;
2539   a->Mbs = oldmat->Mbs;
2540   a->Nbs = oldmat->Nbs;
2541 
2542   a->size         = oldmat->size;
2543   a->rank         = oldmat->rank;
2544   a->donotstash   = oldmat->donotstash;
2545   a->roworiented  = oldmat->roworiented;
2546   a->rowindices   = 0;
2547   a->rowvalues    = 0;
2548   a->getrowactive = PETSC_FALSE;
2549   a->barray       = 0;
2550   a->rstartbs     = oldmat->rstartbs;
2551   a->rendbs       = oldmat->rendbs;
2552   a->cstartbs     = oldmat->cstartbs;
2553   a->cendbs       = oldmat->cendbs;
2554 
2555   /* hash table stuff */
2556   a->ht           = 0;
2557   a->hd           = 0;
2558   a->ht_size      = 0;
2559   a->ht_flag      = oldmat->ht_flag;
2560   a->ht_fact      = oldmat->ht_fact;
2561   a->ht_total_ct  = 0;
2562   a->ht_insert_ct = 0;
2563 
2564   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2565   if (oldmat->colmap) {
2566 #if defined(PETSC_USE_CTABLE)
2567     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2568 #else
2569     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2570     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2571     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2572 #endif
2573   } else a->colmap = 0;
2574 
2575   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2576     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2577     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2578     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2579   } else a->garray = 0;
2580 
2581   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2582   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2583   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2584   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2585   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2586 
2587   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2588   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2589   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2590   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2591 
2592   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2593   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2594   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2595   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2596   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2597   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2598   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2599   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2600   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2601   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2602   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2603   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2604   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2605 
2606   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2607   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2608   a->sMvctx = oldmat->sMvctx;
2609   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2610 
2611   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2612   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2613   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2614   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2615   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2616   *newmat = mat;
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2621 {
2622   PetscErrorCode ierr;
2623   PetscInt       i,nz,j,rstart,rend;
2624   PetscScalar    *vals,*buf;
2625   MPI_Comm       comm;
2626   MPI_Status     status;
2627   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2628   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2629   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2630   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2631   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2632   PetscInt       dcount,kmax,k,nzcount,tmp;
2633   int            fd;
2634   PetscBool      isbinary;
2635 
2636   PetscFunctionBegin;
2637   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2638   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2639 
2640   /* force binary viewer to load .info file if it has not yet done so */
2641   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2642   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2643   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2644   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2645   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2646   if (bs < 0) bs = 1;
2647 
2648   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2649   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2650   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2651   if (!rank) {
2652     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2653     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2654     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2655   }
2656 
2657   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2658   M    = header[1];
2659   N    = header[2];
2660 
2661   /* If global sizes are set, check if they are consistent with that given in the file */
2662   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2663   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2664 
2665   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2666 
2667   /*
2668      This code adds extra rows to make sure the number of rows is
2669      divisible by the blocksize
2670   */
2671   Mbs        = M/bs;
2672   extra_rows = bs - M + bs*(Mbs);
2673   if (extra_rows == bs) extra_rows = 0;
2674   else                  Mbs++;
2675   if (extra_rows &&!rank) {
2676     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2677   }
2678 
2679   /* determine ownership of all rows */
2680   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2681     mbs = Mbs/size + ((Mbs % size) > rank);
2682     m   = mbs*bs;
2683   } else { /* User Set */
2684     m   = newmat->rmap->n;
2685     mbs = m/bs;
2686   }
2687   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2688   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2689   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2690   rowners[0] = 0;
2691   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2692   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2693   rstart = rowners[rank];
2694   rend   = rowners[rank+1];
2695 
2696   /* distribute row lengths to all processors */
2697   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2698   if (!rank) {
2699     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2700     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2701     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2702     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2703     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2704     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2705     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2706   } else {
2707     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2708   }
2709 
2710   if (!rank) {   /* procs[0] */
2711     /* calculate the number of nonzeros on each processor */
2712     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2713     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2714     for (i=0; i<size; i++) {
2715       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2716         procsnz[i] += rowlengths[j];
2717       }
2718     }
2719     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2720 
2721     /* determine max buffer needed and allocate it */
2722     maxnz = 0;
2723     for (i=0; i<size; i++) {
2724       maxnz = PetscMax(maxnz,procsnz[i]);
2725     }
2726     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2727 
2728     /* read in my part of the matrix column indices  */
2729     nz     = procsnz[0];
2730     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2731     mycols = ibuf;
2732     if (size == 1) nz -= extra_rows;
2733     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2734     if (size == 1) {
2735       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2736     }
2737 
2738     /* read in every ones (except the last) and ship off */
2739     for (i=1; i<size-1; i++) {
2740       nz   = procsnz[i];
2741       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2742       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2743     }
2744     /* read in the stuff for the last proc */
2745     if (size != 1) {
2746       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2747       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2748       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2749       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2750     }
2751     ierr = PetscFree(cols);CHKERRQ(ierr);
2752   } else {  /* procs[i], i>0 */
2753     /* determine buffer space needed for message */
2754     nz = 0;
2755     for (i=0; i<m; i++) nz += locrowlens[i];
2756     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2757     mycols = ibuf;
2758     /* receive message of column indices*/
2759     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2760     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2761     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2762   }
2763 
2764   /* loop over local rows, determining number of off diagonal entries */
2765   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2766   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2767   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2768   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2769   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2770   rowcount = 0;
2771   nzcount  = 0;
2772   for (i=0; i<mbs; i++) {
2773     dcount  = 0;
2774     odcount = 0;
2775     for (j=0; j<bs; j++) {
2776       kmax = locrowlens[rowcount];
2777       for (k=0; k<kmax; k++) {
2778         tmp = mycols[nzcount++]/bs; /* block col. index */
2779         if (!mask[tmp]) {
2780           mask[tmp] = 1;
2781           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2782           else masked1[dcount++] = tmp; /* entry in diag portion */
2783         }
2784       }
2785       rowcount++;
2786     }
2787 
2788     dlens[i]  = dcount;  /* d_nzz[i] */
2789     odlens[i] = odcount; /* o_nzz[i] */
2790 
2791     /* zero out the mask elements we set */
2792     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2793     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2794   }
2795   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2796   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2797   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2798 
2799   if (!rank) {
2800     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2801     /* read in my part of the matrix numerical values  */
2802     nz     = procsnz[0];
2803     vals   = buf;
2804     mycols = ibuf;
2805     if (size == 1) nz -= extra_rows;
2806     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2807     if (size == 1) {
2808       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2809     }
2810 
2811     /* insert into matrix */
2812     jj = rstart*bs;
2813     for (i=0; i<m; i++) {
2814       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2815       mycols += locrowlens[i];
2816       vals   += locrowlens[i];
2817       jj++;
2818     }
2819 
2820     /* read in other processors (except the last one) and ship out */
2821     for (i=1; i<size-1; i++) {
2822       nz   = procsnz[i];
2823       vals = buf;
2824       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2825       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2826     }
2827     /* the last proc */
2828     if (size != 1) {
2829       nz   = procsnz[i] - extra_rows;
2830       vals = buf;
2831       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2832       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2833       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2834     }
2835     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2836 
2837   } else {
2838     /* receive numeric values */
2839     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2840 
2841     /* receive message of values*/
2842     vals   = buf;
2843     mycols = ibuf;
2844     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2845     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2846     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2847 
2848     /* insert into matrix */
2849     jj = rstart*bs;
2850     for (i=0; i<m; i++) {
2851       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2852       mycols += locrowlens[i];
2853       vals   += locrowlens[i];
2854       jj++;
2855     }
2856   }
2857 
2858   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2859   ierr = PetscFree(buf);CHKERRQ(ierr);
2860   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2861   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2862   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2863   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2864   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2865   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 /*XXXXX@
2870    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2871 
2872    Input Parameters:
2873 .  mat  - the matrix
2874 .  fact - factor
2875 
2876    Not Collective on Mat, each process can have a different hash factor
2877 
2878    Level: advanced
2879 
2880   Notes:
2881    This can also be set by the command line option: -mat_use_hash_table fact
2882 
2883 .keywords: matrix, hashtable, factor, HT
2884 
2885 .seealso: MatSetOption()
2886 @XXXXX*/
2887 
2888 
2889 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2890 {
2891   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2892   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2893   PetscReal      atmp;
2894   PetscReal      *work,*svalues,*rvalues;
2895   PetscErrorCode ierr;
2896   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2897   PetscMPIInt    rank,size;
2898   PetscInt       *rowners_bs,dest,count,source;
2899   PetscScalar    *va;
2900   MatScalar      *ba;
2901   MPI_Status     stat;
2902 
2903   PetscFunctionBegin;
2904   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2905   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2906   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2907 
2908   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2909   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2910 
2911   bs  = A->rmap->bs;
2912   mbs = a->mbs;
2913   Mbs = a->Mbs;
2914   ba  = b->a;
2915   bi  = b->i;
2916   bj  = b->j;
2917 
2918   /* find ownerships */
2919   rowners_bs = A->rmap->range;
2920 
2921   /* each proc creates an array to be distributed */
2922   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2923   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2924 
2925   /* row_max for B */
2926   if (rank != size-1) {
2927     for (i=0; i<mbs; i++) {
2928       ncols = bi[1] - bi[0]; bi++;
2929       brow  = bs*i;
2930       for (j=0; j<ncols; j++) {
2931         bcol = bs*(*bj);
2932         for (kcol=0; kcol<bs; kcol++) {
2933           col  = bcol + kcol;                /* local col index */
2934           col += rowners_bs[rank+1];      /* global col index */
2935           for (krow=0; krow<bs; krow++) {
2936             atmp = PetscAbsScalar(*ba); ba++;
2937             row  = brow + krow;   /* local row index */
2938             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2939             if (work[col] < atmp) work[col] = atmp;
2940           }
2941         }
2942         bj++;
2943       }
2944     }
2945 
2946     /* send values to its owners */
2947     for (dest=rank+1; dest<size; dest++) {
2948       svalues = work + rowners_bs[dest];
2949       count   = rowners_bs[dest+1]-rowners_bs[dest];
2950       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2951     }
2952   }
2953 
2954   /* receive values */
2955   if (rank) {
2956     rvalues = work;
2957     count   = rowners_bs[rank+1]-rowners_bs[rank];
2958     for (source=0; source<rank; source++) {
2959       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2960       /* process values */
2961       for (i=0; i<count; i++) {
2962         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2963       }
2964     }
2965   }
2966 
2967   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2968   ierr = PetscFree(work);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2973 {
2974   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2975   PetscErrorCode    ierr;
2976   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2977   PetscScalar       *x,*ptr,*from;
2978   Vec               bb1;
2979   const PetscScalar *b;
2980 
2981   PetscFunctionBegin;
2982   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2983   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2984 
2985   if (flag == SOR_APPLY_UPPER) {
2986     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2987     PetscFunctionReturn(0);
2988   }
2989 
2990   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2991     if (flag & SOR_ZERO_INITIAL_GUESS) {
2992       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2993       its--;
2994     }
2995 
2996     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2997     while (its--) {
2998 
2999       /* lower triangular part: slvec0b = - B^T*xx */
3000       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3001 
3002       /* copy xx into slvec0a */
3003       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3004       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3005       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3006       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3007 
3008       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3009 
3010       /* copy bb into slvec1a */
3011       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3012       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3013       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3014       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3015 
3016       /* set slvec1b = 0 */
3017       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3018 
3019       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3020       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3021       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3022       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3023 
3024       /* upper triangular part: bb1 = bb1 - B*x */
3025       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3026 
3027       /* local diagonal sweep */
3028       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3029     }
3030     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3031   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3032     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3033   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3034     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3035   } else if (flag & SOR_EISENSTAT) {
3036     Vec               xx1;
3037     PetscBool         hasop;
3038     const PetscScalar *diag;
3039     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3040     PetscInt          i,n;
3041 
3042     if (!mat->xx1) {
3043       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3044       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3045     }
3046     xx1 = mat->xx1;
3047     bb1 = mat->bb1;
3048 
3049     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3050 
3051     if (!mat->diag) {
3052       /* this is wrong for same matrix with new nonzero values */
3053       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3054       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3055     }
3056     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3057 
3058     if (hasop) {
3059       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3060       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3061     } else {
3062       /*
3063           These two lines are replaced by code that may be a bit faster for a good compiler
3064       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3065       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3066       */
3067       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3068       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3069       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3070       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3071       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3072       if (omega == 1.0) {
3073         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3074         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3075       } else {
3076         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3077         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3078       }
3079       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3080       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3081       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3082       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3083     }
3084 
3085     /* multiply off-diagonal portion of matrix */
3086     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3087     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3088     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3089     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3090     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3091     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3092     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3093     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3094     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3095     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3096 
3097     /* local sweep */
3098     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3099     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3100   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3105 {
3106   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3107   PetscErrorCode ierr;
3108   Vec            lvec1,bb1;
3109 
3110   PetscFunctionBegin;
3111   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3112   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3113 
3114   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3115     if (flag & SOR_ZERO_INITIAL_GUESS) {
3116       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3117       its--;
3118     }
3119 
3120     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3121     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3122     while (its--) {
3123       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3124 
3125       /* lower diagonal part: bb1 = bb - B^T*xx */
3126       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3127       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3128 
3129       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3130       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3131       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3132 
3133       /* upper diagonal part: bb1 = bb1 - B*x */
3134       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3135       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3136 
3137       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3138 
3139       /* diagonal sweep */
3140       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3141     }
3142     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3143     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3144   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3145   PetscFunctionReturn(0);
3146 }
3147 
3148 /*@
3149      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3150          CSR format the local rows.
3151 
3152    Collective on MPI_Comm
3153 
3154    Input Parameters:
3155 +  comm - MPI communicator
3156 .  bs - the block size, only a block size of 1 is supported
3157 .  m - number of local rows (Cannot be PETSC_DECIDE)
3158 .  n - This value should be the same as the local size used in creating the
3159        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3160        calculated if N is given) For square matrices n is almost always m.
3161 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3162 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3163 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3164 .   j - column indices
3165 -   a - matrix values
3166 
3167    Output Parameter:
3168 .   mat - the matrix
3169 
3170    Level: intermediate
3171 
3172    Notes:
3173        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3174      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3175      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3176 
3177        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3178 
3179 .keywords: matrix, aij, compressed row, sparse, parallel
3180 
3181 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3182           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3183 @*/
3184 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3185 {
3186   PetscErrorCode ierr;
3187 
3188 
3189   PetscFunctionBegin;
3190   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3191   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3192   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3193   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3194   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3195   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3196   PetscFunctionReturn(0);
3197 }
3198 
3199 
3200 /*@C
3201    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3202    (the default parallel PETSc format).
3203 
3204    Collective on MPI_Comm
3205 
3206    Input Parameters:
3207 +  B - the matrix
3208 .  bs - the block size
3209 .  i - the indices into j for the start of each local row (starts with zero)
3210 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3211 -  v - optional values in the matrix
3212 
3213    Level: developer
3214 
3215 .keywords: matrix, aij, compressed row, sparse, parallel
3216 
3217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3218 @*/
3219 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3220 {
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3229 {
3230   PetscErrorCode ierr;
3231   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3232   PetscInt       *indx;
3233   PetscScalar    *values;
3234 
3235   PetscFunctionBegin;
3236   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3237   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3238     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3239     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3240     PetscInt       *bindx,rmax=a->rmax,j;
3241 
3242     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3243     mbs = m/bs; Nbs = N/cbs;
3244     if (n == PETSC_DECIDE) {
3245       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3246     }
3247     /* Check sum(n) = Nbs */
3248     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3249     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3250 
3251     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3252     rstart -= mbs;
3253 
3254     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3255     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3256     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3257     for (i=0; i<mbs; i++) {
3258       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3259       nnz = nnz/bs;
3260       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3261       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3262       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3263     }
3264     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3265     ierr = PetscFree(bindx);CHKERRQ(ierr);
3266 
3267     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3268     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3269     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3270     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3271     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3272     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3273   }
3274 
3275   /* numeric phase */
3276   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3277   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3278 
3279   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3280   for (i=0; i<m; i++) {
3281     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3282     Ii   = i + rstart;
3283     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3284     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3285   }
3286   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3287   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   PetscFunctionReturn(0);
3290 }
3291