xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision fbfcfee5110779e3d6a9465ca0a2e0f9a1a6e5e3)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
11 {
12   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
17   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
22 {
23   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
28   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
33   { \
34  \
35     brow = row/bs;  \
36     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
37     rmax = aimax[brow]; nrow = ailen[brow]; \
38     bcol = col/bs; \
39     ridx = row % bs; cidx = col % bs; \
40     low  = 0; high = nrow; \
41     while (high-low > 3) { \
42       t = (low+high)/2; \
43       if (rp[t] > bcol) high = t; \
44       else              low  = t; \
45     } \
46     for (_i=low; _i<high; _i++) { \
47       if (rp[_i] > bcol) break; \
48       if (rp[_i] == bcol) { \
49         bap = ap + bs2*_i + bs*cidx + ridx; \
50         if (addv == ADD_VALUES) *bap += value;  \
51         else                    *bap  = value;  \
52         goto a_noinsert; \
53       } \
54     } \
55     if (a->nonew == 1) goto a_noinsert; \
56     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
57     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
58     N = nrow++ - 1;  \
59     /* shift up all the later entries in this row */ \
60     for (ii=N; ii>=_i; ii--) { \
61       rp[ii+1] = rp[ii]; \
62       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
63     } \
64     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
65     rp[_i]                      = bcol;  \
66     ap[bs2*_i + bs*cidx + ridx] = value;  \
67     A->nonzerostate++;\
68 a_noinsert:; \
69     ailen[brow] = nrow; \
70   }
71 
72 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
73   { \
74     brow = row/bs;  \
75     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
76     rmax = bimax[brow]; nrow = bilen[brow]; \
77     bcol = col/bs; \
78     ridx = row % bs; cidx = col % bs; \
79     low  = 0; high = nrow; \
80     while (high-low > 3) { \
81       t = (low+high)/2; \
82       if (rp[t] > bcol) high = t; \
83       else              low  = t; \
84     } \
85     for (_i=low; _i<high; _i++) { \
86       if (rp[_i] > bcol) break; \
87       if (rp[_i] == bcol) { \
88         bap = ap + bs2*_i + bs*cidx + ridx; \
89         if (addv == ADD_VALUES) *bap += value;  \
90         else                    *bap  = value;  \
91         goto b_noinsert; \
92       } \
93     } \
94     if (b->nonew == 1) goto b_noinsert; \
95     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
96     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
97     N = nrow++ - 1;  \
98     /* shift up all the later entries in this row */ \
99     for (ii=N; ii>=_i; ii--) { \
100       rp[ii+1] = rp[ii]; \
101       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
102     } \
103     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
104     rp[_i]                      = bcol;  \
105     ap[bs2*_i + bs*cidx + ridx] = value;  \
106     B->nonzerostate++;\
107 b_noinsert:; \
108     bilen[brow] = nrow; \
109   }
110 
111 /* Only add/insert a(i,j) with i<=j (blocks).
112    Any a(i,j) with i>j input by user is ingored.
113 */
114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
117   MatScalar      value;
118   PetscBool      roworiented = baij->roworiented;
119   PetscErrorCode ierr;
120   PetscInt       i,j,row,col;
121   PetscInt       rstart_orig=mat->rmap->rstart;
122   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
123   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
124 
125   /* Some Variables required in the macro */
126   Mat          A     = baij->A;
127   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
128   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
129   MatScalar    *aa   =a->a;
130 
131   Mat         B     = baij->B;
132   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
133   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
134   MatScalar   *ba   =b->a;
135 
136   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
137   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
138   MatScalar *ap,*bap;
139 
140   /* for stash */
141   PetscInt  n_loc, *in_loc = NULL;
142   MatScalar *v_loc = NULL;
143 
144   PetscFunctionBegin;
145   if (!baij->donotstash) {
146     if (n > baij->n_loc) {
147       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
148       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
149       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
150       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
151 
152       baij->n_loc = n;
153     }
154     in_loc = baij->in_loc;
155     v_loc  = baij->v_loc;
156   }
157 
158   for (i=0; i<m; i++) {
159     if (im[i] < 0) continue;
160 #if defined(PETSC_USE_DEBUG)
161     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
162 #endif
163     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
164       row = im[i] - rstart_orig;              /* local row index */
165       for (j=0; j<n; j++) {
166         if (im[i]/bs > in[j]/bs) {
167           if (a->ignore_ltriangular) {
168             continue;    /* ignore lower triangular blocks */
169           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
170         }
171         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
172           col  = in[j] - cstart_orig;         /* local col index */
173           brow = row/bs; bcol = col/bs;
174           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
175           if (roworiented) value = v[i*n+j];
176           else             value = v[i+j*m];
177           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
178           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
179         } else if (in[j] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
182 #endif
183         else {  /* off-diag entry (B) */
184           if (mat->was_assembled) {
185             if (!baij->colmap) {
186               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187             }
188 #if defined(PETSC_USE_CTABLE)
189             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
190             col  = col - 1;
191 #else
192             col = baij->colmap[in[j]/bs] - 1;
193 #endif
194             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
195               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
196               col  =  in[j];
197               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
198               B    = baij->B;
199               b    = (Mat_SeqBAIJ*)(B)->data;
200               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
201               ba   = b->a;
202             } else col += in[j]%bs;
203           } else col = in[j];
204           if (roworiented) value = v[i*n+j];
205           else             value = v[i+j*m];
206           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
207           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
208         }
209       }
210     } else {  /* off processor entry */
211       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
212       if (!baij->donotstash) {
213         mat->assembled = PETSC_FALSE;
214         n_loc          = 0;
215         for (j=0; j<n; j++) {
216           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
217           in_loc[n_loc] = in[j];
218           if (roworiented) {
219             v_loc[n_loc] = v[i*n+j];
220           } else {
221             v_loc[n_loc] = v[j*m+i];
222           }
223           n_loc++;
224         }
225         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
226       }
227     }
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
233 {
234   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
235   PetscErrorCode    ierr;
236   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
237   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
238   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
239   PetscBool         roworiented=a->roworiented;
240   const PetscScalar *value     = v;
241   MatScalar         *ap,*aa = a->a,*bap;
242 
243   PetscFunctionBegin;
244   if (col < row) {
245     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
246     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
247   }
248   rp   = aj + ai[row];
249   ap   = aa + bs2*ai[row];
250   rmax = imax[row];
251   nrow = ailen[row];
252   value = v;
253   low   = 0;
254   high  = nrow;
255 
256   while (high-low > 7) {
257     t = (low+high)/2;
258     if (rp[t] > col) high = t;
259     else             low  = t;
260   }
261   for (i=low; i<high; i++) {
262     if (rp[i] > col) break;
263     if (rp[i] == col) {
264       bap = ap +  bs2*i;
265       if (roworiented) {
266         if (is == ADD_VALUES) {
267           for (ii=0; ii<bs; ii++) {
268             for (jj=ii; jj<bs2; jj+=bs) {
269               bap[jj] += *value++;
270             }
271           }
272         } else {
273           for (ii=0; ii<bs; ii++) {
274             for (jj=ii; jj<bs2; jj+=bs) {
275               bap[jj] = *value++;
276             }
277           }
278         }
279       } else {
280         if (is == ADD_VALUES) {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=0; jj<bs; jj++) {
283               *bap++ += *value++;
284             }
285           }
286         } else {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=0; jj<bs; jj++) {
289               *bap++  = *value++;
290             }
291           }
292         }
293       }
294       goto noinsert2;
295     }
296   }
297   if (nonew == 1) goto noinsert2;
298   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
299   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
300   N = nrow++ - 1; high++;
301   /* shift up all the later entries in this row */
302   for (ii=N; ii>=i; ii--) {
303     rp[ii+1] = rp[ii];
304     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
305   }
306   if (N >= i) {
307     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
308   }
309   rp[i] = col;
310   bap   = ap +  bs2*i;
311   if (roworiented) {
312     for (ii=0; ii<bs; ii++) {
313       for (jj=ii; jj<bs2; jj+=bs) {
314         bap[jj] = *value++;
315       }
316     }
317   } else {
318     for (ii=0; ii<bs; ii++) {
319       for (jj=0; jj<bs; jj++) {
320         *bap++ = *value++;
321       }
322     }
323   }
324   noinsert2:;
325   ailen[row] = nrow;
326   PetscFunctionReturn(0);
327 }
328 
329 /*
330    This routine is exactly duplicated in mpibaij.c
331 */
332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
333 {
334   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
335   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
336   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
337   PetscErrorCode    ierr;
338   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
339   PetscBool         roworiented=a->roworiented;
340   const PetscScalar *value     = v;
341   MatScalar         *ap,*aa = a->a,*bap;
342 
343   PetscFunctionBegin;
344   rp   = aj + ai[row];
345   ap   = aa + bs2*ai[row];
346   rmax = imax[row];
347   nrow = ailen[row];
348   low  = 0;
349   high = nrow;
350   value = v;
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++,value+=bs) {
377             for (jj=0; jj<bs; jj++) {
378               bap[jj] += value[jj];
379             }
380             bap += bs;
381           }
382         } else {
383           for (ii=0; ii<bs; ii++,value+=bs) {
384             for (jj=0; jj<bs; jj++) {
385               bap[jj]  = value[jj];
386             }
387             bap += bs;
388           }
389         }
390       }
391       goto noinsert2;
392     }
393   }
394   if (nonew == 1) goto noinsert2;
395   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
396   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
397   N = nrow++ - 1; high++;
398   /* shift up all the later entries in this row */
399   for (ii=N; ii>=i; ii--) {
400     rp[ii+1] = rp[ii];
401     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
402   }
403   if (N >= i) {
404     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
405   }
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427     This routine could be optimized by removing the need for the block copy below and passing stride information
428   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
429 */
430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
431 {
432   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
433   const MatScalar *value;
434   MatScalar       *barray     =baij->barray;
435   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
436   PetscErrorCode  ierr;
437   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
438   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
439   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   if (!barray) {
443     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
444     baij->barray = barray;
445   }
446 
447   if (roworiented) {
448     stepval = (n-1)*bs;
449   } else {
450     stepval = (m-1)*bs;
451   }
452   for (i=0; i<m; i++) {
453     if (im[i] < 0) continue;
454 #if defined(PETSC_USE_DEBUG)
455     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
456 #endif
457     if (im[i] >= rstart && im[i] < rend) {
458       row = im[i] - rstart;
459       for (j=0; j<n; j++) {
460         if (im[i] > in[j]) {
461           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
462           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
463         }
464         /* If NumCol = 1 then a copy is not required */
465         if ((roworiented) && (n == 1)) {
466           barray = (MatScalar*) v + i*bs2;
467         } else if ((!roworiented) && (m == 1)) {
468           barray = (MatScalar*) v + j*bs2;
469         } else { /* Here a copy is required */
470           if (roworiented) {
471             value = v + i*(stepval+bs)*bs + j*bs;
472           } else {
473             value = v + j*(stepval+bs)*bs + i*bs;
474           }
475           for (ii=0; ii<bs; ii++,value+=stepval) {
476             for (jj=0; jj<bs; jj++) {
477               *barray++ = *value++;
478             }
479           }
480           barray -=bs2;
481         }
482 
483         if (in[j] >= cstart && in[j] < cend) {
484           col  = in[j] - cstart;
485           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
486         } else if (in[j] < 0) continue;
487 #if defined(PETSC_USE_DEBUG)
488         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
489 #endif
490         else {
491           if (mat->was_assembled) {
492             if (!baij->colmap) {
493               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
494             }
495 
496 #if defined(PETSC_USE_DEBUG)
497 #if defined(PETSC_USE_CTABLE)
498             { PetscInt data;
499               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
500               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
501             }
502 #else
503             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
504 #endif
505 #endif
506 #if defined(PETSC_USE_CTABLE)
507             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
508             col  = (col - 1)/bs;
509 #else
510             col = (baij->colmap[in[j]] - 1)/bs;
511 #endif
512             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
513               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
514               col  = in[j];
515             }
516           } else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
522       if (!baij->donotstash) {
523         if (roworiented) {
524           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
535 {
536   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
537   PetscErrorCode ierr;
538   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
539   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
540 
541   PetscFunctionBegin;
542   for (i=0; i<m; i++) {
543     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
544     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
545     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
546       row = idxm[i] - bsrstart;
547       for (j=0; j<n; j++) {
548         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
549         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
550         if (idxn[j] >= bscstart && idxn[j] < bscend) {
551           col  = idxn[j] - bscstart;
552           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
553         } else {
554           if (!baij->colmap) {
555             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
556           }
557 #if defined(PETSC_USE_CTABLE)
558           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
559           data--;
560 #else
561           data = baij->colmap[idxn[j]/bs]-1;
562 #endif
563           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
564           else {
565             col  = data + idxn[j]%bs;
566             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567           }
568         }
569       }
570     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
576 {
577   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
578   PetscErrorCode ierr;
579   PetscReal      sum[2],*lnorm2;
580 
581   PetscFunctionBegin;
582   if (baij->size == 1) {
583     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
584   } else {
585     if (type == NORM_FROBENIUS) {
586       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
587       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
588       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
589       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
590       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
591       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
592       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
593       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
594     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
595       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
596       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
597       PetscReal    *rsum,*rsum2,vabs;
598       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
599       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
600       MatScalar    *v;
601 
602       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
603       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
604       /* Amat */
605       v = amat->a; jj = amat->j;
606       for (brow=0; brow<mbs; brow++) {
607         grow = bs*(rstart + brow);
608         nz   = amat->i[brow+1] - amat->i[brow];
609         for (bcol=0; bcol<nz; bcol++) {
610           gcol = bs*(rstart + *jj); jj++;
611           for (col=0; col<bs; col++) {
612             for (row=0; row<bs; row++) {
613               vabs            = PetscAbsScalar(*v); v++;
614               rsum[gcol+col] += vabs;
615               /* non-diagonal block */
616               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
617             }
618           }
619         }
620         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
621       }
622       /* Bmat */
623       v = bmat->a; jj = bmat->j;
624       for (brow=0; brow<mbs; brow++) {
625         grow = bs*(rstart + brow);
626         nz = bmat->i[brow+1] - bmat->i[brow];
627         for (bcol=0; bcol<nz; bcol++) {
628           gcol = bs*garray[*jj]; jj++;
629           for (col=0; col<bs; col++) {
630             for (row=0; row<bs; row++) {
631               vabs            = PetscAbsScalar(*v); v++;
632               rsum[gcol+col] += vabs;
633               rsum[grow+row] += vabs;
634             }
635           }
636         }
637         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
638       }
639       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640       *norm = 0.0;
641       for (col=0; col<mat->cmap->N; col++) {
642         if (rsum2[col] > *norm) *norm = rsum2[col];
643       }
644       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
653   PetscErrorCode ierr;
654   PetscInt       nstash,reallocs;
655 
656   PetscFunctionBegin;
657   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
658 
659   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
671   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
674   PetscInt       *row,*col;
675   PetscBool      other_disassembled;
676   PetscMPIInt    n;
677   PetscBool      r1,r2,r3;
678   MatScalar      *val;
679 
680   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
681   PetscFunctionBegin;
682   if (!baij->donotstash &&  !mat->nooffprocentries) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       if (!flg) break;
686 
687       for (i=0; i<n;) {
688         /* Now identify the consecutive vals belonging to the same row */
689         for (j=i,rstart=row[j]; j<n; j++) {
690           if (row[j] != rstart) break;
691         }
692         if (j < n) ncols = j-i;
693         else       ncols = n-i;
694         /* Now assemble all these values with a single function call */
695         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
696         i    = j;
697       }
698     }
699     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
700     /* Now process the block-stash. Since the values are stashed column-oriented,
701        set the roworiented flag to column oriented, and after MatSetValues()
702        restore the original flags */
703     r1 = baij->roworiented;
704     r2 = a->roworiented;
705     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
706 
707     baij->roworiented = PETSC_FALSE;
708     a->roworiented    = PETSC_FALSE;
709 
710     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) {
718           if (row[j] != rstart) break;
719         }
720         if (j < n) ncols = j-i;
721         else       ncols = n-i;
722         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
723         i    = j;
724       }
725     }
726     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
727 
728     baij->roworiented = r1;
729     a->roworiented    = r2;
730 
731     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
732   }
733 
734   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
735   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
736 
737   /* determine if any processor has disassembled, if so we must
738      also disassemble ourselfs, in order that we may reassemble. */
739   /*
740      if nonzero structure of submatrix B cannot change then we know that
741      no processor disassembled thus we can skip this stuff
742   */
743   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
744     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745     if (mat->was_assembled && !other_disassembled) {
746       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
747     }
748   }
749 
750   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
751     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
752   }
753   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
754   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
755 
756   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
757 
758   baij->rowvalues = 0;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
762     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer);
769 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
770 #include <petscdraw.h>
771 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
772 {
773   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
774   PetscErrorCode    ierr;
775   PetscInt          bs   = mat->rmap->bs;
776   PetscMPIInt       rank = baij->rank;
777   PetscBool         iascii,isdraw;
778   PetscViewer       sviewer;
779   PetscViewerFormat format;
780 
781   PetscFunctionBegin;
782   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
783   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
784   if (iascii) {
785     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
786     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
787       MatInfo info;
788       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
789       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
790       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
791       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
792       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
794       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
795       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
796       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
798       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
799       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
800       PetscFunctionReturn(0);
801     } else if (format == PETSC_VIEWER_ASCII_INFO) {
802       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
803       PetscFunctionReturn(0);
804     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw draw;
811     PetscBool isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
814     if (isnull) PetscFunctionReturn(0);
815   }
816 
817   {
818     /* assemble the entire matrix onto first processor. */
819     Mat          A;
820     Mat_SeqSBAIJ *Aloc;
821     Mat_SeqBAIJ  *Bloc;
822     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
823     MatScalar    *a;
824     const char   *matname;
825 
826     /* Should this be the same type as mat? */
827     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
828     if (!rank) {
829       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
830     } else {
831       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
832     }
833     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
834     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
835     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
836     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
837 
838     /* copy over the A part */
839     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
840     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
841     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
842 
843     for (i=0; i<mbs; i++) {
844       rvals[0] = bs*(baij->rstartbs + i);
845       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
846       for (j=ai[i]; j<ai[i+1]; j++) {
847         col = (baij->cstartbs+aj[j])*bs;
848         for (k=0; k<bs; k++) {
849           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
850           col++;
851           a += bs;
852         }
853       }
854     }
855     /* copy over the B part */
856     Bloc = (Mat_SeqBAIJ*)baij->B->data;
857     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
858     for (i=0; i<mbs; i++) {
859 
860       rvals[0] = bs*(baij->rstartbs + i);
861       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
862       for (j=ai[i]; j<ai[i+1]; j++) {
863         col = baij->garray[aj[j]]*bs;
864         for (k=0; k<bs; k++) {
865           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
866           col++;
867           a += bs;
868         }
869       }
870     }
871     ierr = PetscFree(rvals);CHKERRQ(ierr);
872     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
874     /*
875        Everyone has to call to draw the matrix since the graphics waits are
876        synchronized across all processors that share the PetscDraw object
877     */
878     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
879     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
880     if (!rank) {
881       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
882       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
883     }
884     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
885     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
886     ierr = MatDestroy(&A);CHKERRQ(ierr);
887   }
888   PetscFunctionReturn(0);
889 }
890 
891 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
892 {
893   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
894   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
895   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
896   PetscErrorCode ierr;
897   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
898   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
899   int            fd;
900   PetscScalar    *column_values;
901   FILE           *file;
902   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
903   PetscInt       message_count,flowcontrolcount;
904 
905   PetscFunctionBegin;
906   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
907   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
908   nz   = bs2*(A->nz + B->nz);
909   rlen = mat->rmap->n;
910   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
911   if (!rank) {
912     header[0] = MAT_FILE_CLASSID;
913     header[1] = mat->rmap->N;
914     header[2] = mat->cmap->N;
915 
916     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
917     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
918     /* get largest number of rows any processor has */
919     range = mat->rmap->range;
920     for (i=1; i<size; i++) {
921       rlen = PetscMax(rlen,range[i+1] - range[i]);
922     }
923   } else {
924     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
925   }
926 
927   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
928   /* compute lengths of each row  */
929   for (i=0; i<a->mbs; i++) {
930     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
931   }
932   /* store the row lengths to the file */
933   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
934   if (!rank) {
935     MPI_Status status;
936     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
937     rlen = (range[1] - range[0])/bs;
938     for (i=0; i<rlen; i++) {
939       for (j=0; j<bs; j++) {
940         row_lens[i*bs+j] = bs*crow_lens[i];
941       }
942     }
943     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
944     for (i=1; i<size; i++) {
945       rlen = (range[i+1] - range[i])/bs;
946       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
947       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
948       for (k=0; k<rlen; k++) {
949         for (j=0; j<bs; j++) {
950           row_lens[k*bs+j] = bs*crow_lens[k];
951         }
952       }
953       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
954     }
955     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
956     ierr = PetscFree(row_lens);CHKERRQ(ierr);
957   } else {
958     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
959     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
960     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
961   }
962   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
963 
964   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
965      information needed to make it for each row from a block row. This does require more communication but still not more than
966      the communication needed for the nonzero values  */
967   nzmax = nz; /*  space a largest processor needs */
968   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
969   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
970   cnt   = 0;
971   for (i=0; i<a->mbs; i++) {
972     pcnt = cnt;
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ((col = garray[B->j[j]]) > cstart) break;
975       for (l=0; l<bs; l++) {
976         column_indices[cnt++] = bs*col+l;
977       }
978     }
979     for (k=A->i[i]; k<A->i[i+1]; k++) {
980       for (l=0; l<bs; l++) {
981         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
982       }
983     }
984     for (; j<B->i[i+1]; j++) {
985       for (l=0; l<bs; l++) {
986         column_indices[cnt++] = bs*garray[B->j[j]]+l;
987       }
988     }
989     len = cnt - pcnt;
990     for (k=1; k<bs; k++) {
991       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
992       cnt += len;
993     }
994   }
995   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
996 
997   /* store the columns to the file */
998   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
999   if (!rank) {
1000     MPI_Status status;
1001     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     for (i=1; i<size; i++) {
1003       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1004       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1005       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1006       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1007     }
1008     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1009   } else {
1010     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1011     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1013     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1016 
1017   /* load up the numerical values */
1018   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1019   cnt  = 0;
1020   for (i=0; i<a->mbs; i++) {
1021     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1022     for (j=B->i[i]; j<B->i[i+1]; j++) {
1023       if (garray[B->j[j]] > cstart) break;
1024       for (l=0; l<bs; l++) {
1025         for (ll=0; ll<bs; ll++) {
1026           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1027         }
1028       }
1029       cnt += bs;
1030     }
1031     for (k=A->i[i]; k<A->i[i+1]; k++) {
1032       for (l=0; l<bs; l++) {
1033         for (ll=0; ll<bs; ll++) {
1034           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1035         }
1036       }
1037       cnt += bs;
1038     }
1039     for (; j<B->i[i+1]; j++) {
1040       for (l=0; l<bs; l++) {
1041         for (ll=0; ll<bs; ll++) {
1042           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1043         }
1044       }
1045       cnt += bs;
1046     }
1047     cnt += (bs-1)*rlen;
1048   }
1049   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1050 
1051   /* store the column values to the file */
1052   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1053   if (!rank) {
1054     MPI_Status status;
1055     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1056     for (i=1; i<size; i++) {
1057       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1058       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1059       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1060       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1061     }
1062     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1063   } else {
1064     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1065     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1066     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1067     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1068   }
1069   ierr = PetscFree(column_values);CHKERRQ(ierr);
1070 
1071   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1072   if (file) {
1073     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1074   }
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1079 {
1080   PetscErrorCode ierr;
1081   PetscBool      iascii,isdraw,issocket,isbinary;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1085   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1086   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1087   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1088   if (iascii || isdraw || issocket) {
1089     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1090   } else if (isbinary) {
1091     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1097 {
1098   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102 #if defined(PETSC_USE_LOG)
1103   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1104 #endif
1105   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1106   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1107   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1108   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1109 #if defined(PETSC_USE_CTABLE)
1110   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1111 #else
1112   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1113 #endif
1114   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1115   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1116   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1117   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1118   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1119   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1120   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1121   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1122   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1123   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1124   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1125   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1126   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1127   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1128   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1129 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1130   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1131 #endif
1132   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1133   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1134   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1135   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1136 
1137   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
1142 #if defined(PETSC_HAVE_ELEMENTAL)
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1144 #endif
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1149 {
1150   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1151   PetscErrorCode    ierr;
1152   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1153   PetscScalar       *from;
1154   const PetscScalar *x;
1155 
1156   PetscFunctionBegin;
1157   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1158   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1159 
1160   /* diagonal part */
1161   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1162   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1163 
1164   /* subdiagonal part */
1165   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1166 
1167   /* copy x into the vec slvec0 */
1168   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1169   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1170 
1171   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1172   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1173   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1174 
1175   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177   /* supperdiagonal part */
1178   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1183 {
1184   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1185   PetscErrorCode    ierr;
1186   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1187   PetscScalar       *from;
1188   const PetscScalar *x;
1189 
1190   PetscFunctionBegin;
1191   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1192   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1193 
1194   /* diagonal part */
1195   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1196   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1197 
1198   /* subdiagonal part */
1199   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1200 
1201   /* copy x into the vec slvec0 */
1202   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1203   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1204 
1205   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1206   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1207   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1208 
1209   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1210   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1211   /* supperdiagonal part */
1212   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1217 {
1218   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1219   PetscErrorCode ierr;
1220   PetscInt       nt;
1221 
1222   PetscFunctionBegin;
1223   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1224   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1225 
1226   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1227   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1228 
1229   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230   /* do diagonal part */
1231   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1232   /* do supperdiagonal part */
1233   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1235   /* do subdiagonal part */
1236   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1237   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1243 {
1244   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1245   PetscErrorCode    ierr;
1246   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1247   PetscScalar       *from,zero=0.0;
1248   const PetscScalar *x;
1249 
1250   PetscFunctionBegin;
1251   /*
1252   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1253   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1254   */
1255   /* diagonal part */
1256   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1257   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1258 
1259   /* subdiagonal part */
1260   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1261 
1262   /* copy x into the vec slvec0 */
1263   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1264   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1265   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1266   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1267 
1268   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1270   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1271 
1272   /* supperdiagonal part */
1273   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1278 {
1279   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281 
1282   PetscFunctionBegin;
1283   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284   /* do diagonal part */
1285   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1286   /* do supperdiagonal part */
1287   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1288   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1289 
1290   /* do subdiagonal part */
1291   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1292   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1293   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 /*
1298   This only works correctly for square matrices where the subblock A->A is the
1299    diagonal block
1300 */
1301 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1302 {
1303   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1304   PetscErrorCode ierr;
1305 
1306   PetscFunctionBegin;
1307   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1308   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1319   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324 {
1325   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1326   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1327   PetscErrorCode ierr;
1328   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1329   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1330   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1331 
1332   PetscFunctionBegin;
1333   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1334   mat->getrowactive = PETSC_TRUE;
1335 
1336   if (!mat->rowvalues && (idx || v)) {
1337     /*
1338         allocate enough space to hold information from the longest row.
1339     */
1340     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1341     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1342     PetscInt     max = 1,mbs = mat->mbs,tmp;
1343     for (i=0; i<mbs; i++) {
1344       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1345       if (max < tmp) max = tmp;
1346     }
1347     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1348   }
1349 
1350   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1351   lrow = row - brstart;  /* local row index */
1352 
1353   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1354   if (!v)   {pvA = 0; pvB = 0;}
1355   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1356   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1357   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1358   nztot = nzA + nzB;
1359 
1360   cmap = mat->garray;
1361   if (v  || idx) {
1362     if (nztot) {
1363       /* Sort by increasing column numbers, assuming A and B already sorted */
1364       PetscInt imark = -1;
1365       if (v) {
1366         *v = v_p = mat->rowvalues;
1367         for (i=0; i<nzB; i++) {
1368           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1369           else break;
1370         }
1371         imark = i;
1372         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1373         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1374       }
1375       if (idx) {
1376         *idx = idx_p = mat->rowindices;
1377         if (imark > -1) {
1378           for (i=0; i<imark; i++) {
1379             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1380           }
1381         } else {
1382           for (i=0; i<nzB; i++) {
1383             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1384             else break;
1385           }
1386           imark = i;
1387         }
1388         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1389         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1390       }
1391     } else {
1392       if (idx) *idx = 0;
1393       if (v)   *v   = 0;
1394     }
1395   }
1396   *nz  = nztot;
1397   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1398   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1403 {
1404   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1405 
1406   PetscFunctionBegin;
1407   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1408   baij->getrowactive = PETSC_FALSE;
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1413 {
1414   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1415   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1416 
1417   PetscFunctionBegin;
1418   aA->getrow_utriangular = PETSC_TRUE;
1419   PetscFunctionReturn(0);
1420 }
1421 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1422 {
1423   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1424   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1425 
1426   PetscFunctionBegin;
1427   aA->getrow_utriangular = PETSC_FALSE;
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1432 {
1433   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1434   PetscErrorCode ierr;
1435 
1436   PetscFunctionBegin;
1437   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1438   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1443 {
1444   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1445   PetscErrorCode ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1449   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1454    Input: isrow       - distributed(parallel),
1455           iscol_local - locally owned (seq)
1456 */
1457 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1458 {
1459   PetscErrorCode ierr;
1460   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1461   const PetscInt *ptr1,*ptr2;
1462 
1463   PetscFunctionBegin;
1464   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1465   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1466   if (sz1 > sz2) {
1467     *flg = PETSC_FALSE;
1468     PetscFunctionReturn(0);
1469   }
1470 
1471   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1472   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1473 
1474   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1475   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1476   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1477   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1478   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1479   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1480 
1481   nmatch=0;
1482   k     = 0;
1483   for (i=0; i<sz1; i++){
1484     for (j=k; j<sz2; j++){
1485       if (a1[i] == a2[j]) {
1486         k = j; nmatch++;
1487         break;
1488       }
1489     }
1490   }
1491   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1492   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1493   ierr = PetscFree(a1);CHKERRQ(ierr);
1494   ierr = PetscFree(a2);CHKERRQ(ierr);
1495   if (nmatch < sz1) {
1496     *flg = PETSC_FALSE;
1497   } else {
1498     *flg = PETSC_TRUE;
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1504 {
1505   PetscErrorCode ierr;
1506   IS             iscol_local;
1507   PetscInt       csize;
1508   PetscBool      isequal;
1509 
1510   PetscFunctionBegin;
1511   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1512   if (call == MAT_REUSE_MATRIX) {
1513     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1514     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1515   } else {
1516     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1517     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1518     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1519   }
1520 
1521   /* now call MatGetSubMatrix_MPIBAIJ() */
1522   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1523   if (call == MAT_INITIAL_MATRIX) {
1524     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1525     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1531 {
1532   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1537   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1542 {
1543   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1544   Mat            A  = a->A,B = a->B;
1545   PetscErrorCode ierr;
1546   PetscReal      isend[5],irecv[5];
1547 
1548   PetscFunctionBegin;
1549   info->block_size = (PetscReal)matin->rmap->bs;
1550 
1551   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1552 
1553   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1554   isend[3] = info->memory;  isend[4] = info->mallocs;
1555 
1556   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1557 
1558   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1559   isend[3] += info->memory;  isend[4] += info->mallocs;
1560   if (flag == MAT_LOCAL) {
1561     info->nz_used      = isend[0];
1562     info->nz_allocated = isend[1];
1563     info->nz_unneeded  = isend[2];
1564     info->memory       = isend[3];
1565     info->mallocs      = isend[4];
1566   } else if (flag == MAT_GLOBAL_MAX) {
1567     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1568 
1569     info->nz_used      = irecv[0];
1570     info->nz_allocated = irecv[1];
1571     info->nz_unneeded  = irecv[2];
1572     info->memory       = irecv[3];
1573     info->mallocs      = irecv[4];
1574   } else if (flag == MAT_GLOBAL_SUM) {
1575     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1576 
1577     info->nz_used      = irecv[0];
1578     info->nz_allocated = irecv[1];
1579     info->nz_unneeded  = irecv[2];
1580     info->memory       = irecv[3];
1581     info->mallocs      = irecv[4];
1582   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1583   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1584   info->fill_ratio_needed = 0;
1585   info->factor_mallocs    = 0;
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1590 {
1591   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1592   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   switch (op) {
1597   case MAT_NEW_NONZERO_LOCATIONS:
1598   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1599   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1600   case MAT_KEEP_NONZERO_PATTERN:
1601   case MAT_SUBMAT_SINGLEIS:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     MatCheckPreallocated(A,1);
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     MatCheckPreallocated(A,1);
1609     a->roworiented = flg;
1610 
1611     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1612     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1613     break;
1614   case MAT_NEW_DIAGONALS:
1615     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1616     break;
1617   case MAT_IGNORE_OFF_PROC_ENTRIES:
1618     a->donotstash = flg;
1619     break;
1620   case MAT_USE_HASH_TABLE:
1621     a->ht_flag = flg;
1622     break;
1623   case MAT_HERMITIAN:
1624     MatCheckPreallocated(A,1);
1625     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1626     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1627 
1628     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1629     break;
1630   case MAT_SPD:
1631     A->spd_set = PETSC_TRUE;
1632     A->spd     = flg;
1633     if (flg) {
1634       A->symmetric                  = PETSC_TRUE;
1635       A->structurally_symmetric     = PETSC_TRUE;
1636       A->symmetric_set              = PETSC_TRUE;
1637       A->structurally_symmetric_set = PETSC_TRUE;
1638     }
1639     break;
1640   case MAT_SYMMETRIC:
1641     MatCheckPreallocated(A,1);
1642     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1643     break;
1644   case MAT_STRUCTURALLY_SYMMETRIC:
1645     MatCheckPreallocated(A,1);
1646     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1647     break;
1648   case MAT_SYMMETRY_ETERNAL:
1649     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1650     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1651     break;
1652   case MAT_IGNORE_LOWER_TRIANGULAR:
1653     aA->ignore_ltriangular = flg;
1654     break;
1655   case MAT_ERROR_LOWER_TRIANGULAR:
1656     aA->ignore_ltriangular = flg;
1657     break;
1658   case MAT_GETROW_UPPERTRIANGULAR:
1659     aA->getrow_utriangular = flg;
1660     break;
1661   default:
1662     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1668 {
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   if (MAT_INITIAL_MATRIX || *B != A) {
1673     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1674   }
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1679 {
1680   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1681   Mat            a     = baij->A, b=baij->B;
1682   PetscErrorCode ierr;
1683   PetscInt       nv,m,n;
1684   PetscBool      flg;
1685 
1686   PetscFunctionBegin;
1687   if (ll != rr) {
1688     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1689     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1690   }
1691   if (!ll) PetscFunctionReturn(0);
1692 
1693   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1694   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1695 
1696   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1697   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1698 
1699   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1700 
1701   /* left diagonalscale the off-diagonal part */
1702   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1703 
1704   /* scale the diagonal part */
1705   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1706 
1707   /* right diagonalscale the off-diagonal part */
1708   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1709   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1714 {
1715   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1724 
1725 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1726 {
1727   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1728   Mat            a,b,c,d;
1729   PetscBool      flg;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   a = matA->A; b = matA->B;
1734   c = matB->A; d = matB->B;
1735 
1736   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1737   if (flg) {
1738     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1739   }
1740   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1745 {
1746   PetscErrorCode ierr;
1747   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1748   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1749 
1750   PetscFunctionBegin;
1751   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1752   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1753     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1754     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1755     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1756   } else {
1757     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1758     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1764 {
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1773 {
1774   PetscErrorCode ierr;
1775   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1776   PetscBLASInt   bnz,one=1;
1777   Mat_SeqSBAIJ   *xa,*ya;
1778   Mat_SeqBAIJ    *xb,*yb;
1779 
1780   PetscFunctionBegin;
1781   if (str == SAME_NONZERO_PATTERN) {
1782     PetscScalar alpha = a;
1783     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1784     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1785     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1786     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1787     xb   = (Mat_SeqBAIJ*)xx->B->data;
1788     yb   = (Mat_SeqBAIJ*)yy->B->data;
1789     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1790     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1791     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1792   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1793     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1794     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1795     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1796   } else {
1797     Mat      B;
1798     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1799     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1800     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1801     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1802     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1803     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1804     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1805     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1806     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1807     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1808     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1809     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1810     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1811     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1812     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1813     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1814     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1815     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1816     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1817     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1823 {
1824   PetscErrorCode ierr;
1825   PetscInt       i;
1826   PetscBool      flg;
1827 
1828   PetscFunctionBegin;
1829   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1830   for (i=0; i<n; i++) {
1831     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1832     if (!flg) {
1833       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1834     }
1835   }
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1840 {
1841   PetscErrorCode ierr;
1842   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1843   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1844 
1845   PetscFunctionBegin;
1846   if (!Y->preallocated) {
1847     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1848   } else if (!aij->nz) {
1849     PetscInt nonew = aij->nonew;
1850     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1851     aij->nonew = nonew;
1852   }
1853   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1858 {
1859   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1860   PetscErrorCode ierr;
1861 
1862   PetscFunctionBegin;
1863   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1864   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1865   if (d) {
1866     PetscInt rstart;
1867     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1868     *d += rstart/A->rmap->bs;
1869 
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1875 {
1876   PetscFunctionBegin;
1877   *a = ((Mat_MPISBAIJ*)A->data)->A;
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 /* -------------------------------------------------------------------*/
1882 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1883                                        MatGetRow_MPISBAIJ,
1884                                        MatRestoreRow_MPISBAIJ,
1885                                        MatMult_MPISBAIJ,
1886                                /*  4*/ MatMultAdd_MPISBAIJ,
1887                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1888                                        MatMultAdd_MPISBAIJ,
1889                                        0,
1890                                        0,
1891                                        0,
1892                                /* 10*/ 0,
1893                                        0,
1894                                        0,
1895                                        MatSOR_MPISBAIJ,
1896                                        MatTranspose_MPISBAIJ,
1897                                /* 15*/ MatGetInfo_MPISBAIJ,
1898                                        MatEqual_MPISBAIJ,
1899                                        MatGetDiagonal_MPISBAIJ,
1900                                        MatDiagonalScale_MPISBAIJ,
1901                                        MatNorm_MPISBAIJ,
1902                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1903                                        MatAssemblyEnd_MPISBAIJ,
1904                                        MatSetOption_MPISBAIJ,
1905                                        MatZeroEntries_MPISBAIJ,
1906                                /* 24*/ 0,
1907                                        0,
1908                                        0,
1909                                        0,
1910                                        0,
1911                                /* 29*/ MatSetUp_MPISBAIJ,
1912                                        0,
1913                                        0,
1914                                        MatGetDiagonalBlock_MPISBAIJ,
1915                                        0,
1916                                /* 34*/ MatDuplicate_MPISBAIJ,
1917                                        0,
1918                                        0,
1919                                        0,
1920                                        0,
1921                                /* 39*/ MatAXPY_MPISBAIJ,
1922                                        MatGetSubMatrices_MPISBAIJ,
1923                                        MatIncreaseOverlap_MPISBAIJ,
1924                                        MatGetValues_MPISBAIJ,
1925                                        MatCopy_MPISBAIJ,
1926                                /* 44*/ 0,
1927                                        MatScale_MPISBAIJ,
1928                                        MatShift_MPISBAIJ,
1929                                        0,
1930                                        0,
1931                                /* 49*/ 0,
1932                                        0,
1933                                        0,
1934                                        0,
1935                                        0,
1936                                /* 54*/ 0,
1937                                        0,
1938                                        MatSetUnfactored_MPISBAIJ,
1939                                        0,
1940                                        MatSetValuesBlocked_MPISBAIJ,
1941                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1942                                        0,
1943                                        0,
1944                                        0,
1945                                        0,
1946                                /* 64*/ 0,
1947                                        0,
1948                                        0,
1949                                        0,
1950                                        0,
1951                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1952                                        0,
1953                                        0,
1954                                        0,
1955                                        0,
1956                                /* 74*/ 0,
1957                                        0,
1958                                        0,
1959                                        0,
1960                                        0,
1961                                /* 79*/ 0,
1962                                        0,
1963                                        0,
1964                                        0,
1965                                        MatLoad_MPISBAIJ,
1966                                /* 84*/ 0,
1967                                        0,
1968                                        0,
1969                                        0,
1970                                        0,
1971                                /* 89*/ 0,
1972                                        0,
1973                                        0,
1974                                        0,
1975                                        0,
1976                                /* 94*/ 0,
1977                                        0,
1978                                        0,
1979                                        0,
1980                                        0,
1981                                /* 99*/ 0,
1982                                        0,
1983                                        0,
1984                                        0,
1985                                        0,
1986                                /*104*/ 0,
1987                                        MatRealPart_MPISBAIJ,
1988                                        MatImaginaryPart_MPISBAIJ,
1989                                        MatGetRowUpperTriangular_MPISBAIJ,
1990                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1991                                /*109*/ 0,
1992                                        0,
1993                                        0,
1994                                        0,
1995                                        MatMissingDiagonal_MPISBAIJ,
1996                                /*114*/ 0,
1997                                        0,
1998                                        0,
1999                                        0,
2000                                        0,
2001                                /*119*/ 0,
2002                                        0,
2003                                        0,
2004                                        0,
2005                                        0,
2006                                /*124*/ 0,
2007                                        0,
2008                                        0,
2009                                        0,
2010                                        0,
2011                                /*129*/ 0,
2012                                        0,
2013                                        0,
2014                                        0,
2015                                        0,
2016                                /*134*/ 0,
2017                                        0,
2018                                        0,
2019                                        0,
2020                                        0,
2021                                /*139*/ MatSetBlockSizes_Default,
2022                                        0,
2023                                        0,
2024                                        0,
2025                                        0,
2026                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2027 };
2028 
2029 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2030 {
2031   Mat_MPISBAIJ   *b;
2032   PetscErrorCode ierr;
2033   PetscInt       i,mbs,Mbs;
2034 
2035   PetscFunctionBegin;
2036   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2037   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2038   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2039   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2040 
2041   b   = (Mat_MPISBAIJ*)B->data;
2042   mbs = B->rmap->n/bs;
2043   Mbs = B->rmap->N/bs;
2044   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2045 
2046   B->rmap->bs = bs;
2047   b->bs2      = bs*bs;
2048   b->mbs      = mbs;
2049   b->Mbs      = Mbs;
2050   b->nbs      = B->cmap->n/bs;
2051   b->Nbs      = B->cmap->N/bs;
2052 
2053   for (i=0; i<=b->size; i++) {
2054     b->rangebs[i] = B->rmap->range[i]/bs;
2055   }
2056   b->rstartbs = B->rmap->rstart/bs;
2057   b->rendbs   = B->rmap->rend/bs;
2058 
2059   b->cstartbs = B->cmap->rstart/bs;
2060   b->cendbs   = B->cmap->rend/bs;
2061 
2062 #if defined(PETSC_USE_CTABLE)
2063   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2064 #else
2065   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2066 #endif
2067   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2068   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2069   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2070   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2071   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2072   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2073   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2074   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2075   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2076 
2077   /* Because the B will have been resized we simply destroy it and create a new one each time */
2078   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2079   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2080   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2081   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2082   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2083 
2084   if (!B->preallocated) {
2085     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2086     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2087     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2088     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2089     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2090   }
2091 
2092   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2093   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2094 
2095   B->preallocated  = PETSC_TRUE;
2096   B->was_assembled = PETSC_FALSE;
2097   B->assembled     = PETSC_FALSE;
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2102 {
2103   PetscInt       m,rstart,cstart,cend;
2104   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2105   const PetscInt *JJ    =0;
2106   PetscScalar    *values=0;
2107   PetscErrorCode ierr;
2108 
2109   PetscFunctionBegin;
2110   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2111   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2112   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2113   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2114   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2115   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2116   m      = B->rmap->n/bs;
2117   rstart = B->rmap->rstart/bs;
2118   cstart = B->cmap->rstart/bs;
2119   cend   = B->cmap->rend/bs;
2120 
2121   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2122   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2123   for (i=0; i<m; i++) {
2124     nz = ii[i+1] - ii[i];
2125     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2126     nz_max = PetscMax(nz_max,nz);
2127     JJ     = jj + ii[i];
2128     for (j=0; j<nz; j++) {
2129       if (*JJ >= cstart) break;
2130       JJ++;
2131     }
2132     d = 0;
2133     for (; j<nz; j++) {
2134       if (*JJ++ >= cend) break;
2135       d++;
2136     }
2137     d_nnz[i] = d;
2138     o_nnz[i] = nz - d;
2139   }
2140   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2141   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2142 
2143   values = (PetscScalar*)V;
2144   if (!values) {
2145     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2146     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2147   }
2148   for (i=0; i<m; i++) {
2149     PetscInt          row    = i + rstart;
2150     PetscInt          ncols  = ii[i+1] - ii[i];
2151     const PetscInt    *icols = jj + ii[i];
2152     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2153     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2154   }
2155 
2156   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 /*MC
2164    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2165    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2166    the matrix is stored.
2167 
2168   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2169   can call MatSetOption(Mat, MAT_HERMITIAN);
2170 
2171    Options Database Keys:
2172 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2173 
2174   Level: beginner
2175 
2176 .seealso: MatCreateMPISBAIJ
2177 M*/
2178 
2179 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2180 
2181 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2182 {
2183   Mat_MPISBAIJ   *b;
2184   PetscErrorCode ierr;
2185   PetscBool      flg = PETSC_FALSE;
2186 
2187   PetscFunctionBegin;
2188   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2189   B->data = (void*)b;
2190   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2191 
2192   B->ops->destroy = MatDestroy_MPISBAIJ;
2193   B->ops->view    = MatView_MPISBAIJ;
2194   B->assembled    = PETSC_FALSE;
2195   B->insertmode   = NOT_SET_VALUES;
2196 
2197   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2198   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2199 
2200   /* build local table of row and column ownerships */
2201   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2202 
2203   /* build cache for off array entries formed */
2204   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2205 
2206   b->donotstash  = PETSC_FALSE;
2207   b->colmap      = NULL;
2208   b->garray      = NULL;
2209   b->roworiented = PETSC_TRUE;
2210 
2211   /* stuff used in block assembly */
2212   b->barray = 0;
2213 
2214   /* stuff used for matrix vector multiply */
2215   b->lvec    = 0;
2216   b->Mvctx   = 0;
2217   b->slvec0  = 0;
2218   b->slvec0b = 0;
2219   b->slvec1  = 0;
2220   b->slvec1a = 0;
2221   b->slvec1b = 0;
2222   b->sMvctx  = 0;
2223 
2224   /* stuff for MatGetRow() */
2225   b->rowindices   = 0;
2226   b->rowvalues    = 0;
2227   b->getrowactive = PETSC_FALSE;
2228 
2229   /* hash table stuff */
2230   b->ht           = 0;
2231   b->hd           = 0;
2232   b->ht_size      = 0;
2233   b->ht_flag      = PETSC_FALSE;
2234   b->ht_fact      = 0;
2235   b->ht_total_ct  = 0;
2236   b->ht_insert_ct = 0;
2237 
2238   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2239   b->ijonly = PETSC_FALSE;
2240 
2241   b->in_loc = 0;
2242   b->v_loc  = 0;
2243   b->n_loc  = 0;
2244 
2245   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2246   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2249 #if defined(PETSC_HAVE_ELEMENTAL)
2250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2251 #endif
2252 
2253   B->symmetric                  = PETSC_TRUE;
2254   B->structurally_symmetric     = PETSC_TRUE;
2255   B->symmetric_set              = PETSC_TRUE;
2256   B->structurally_symmetric_set = PETSC_TRUE;
2257 
2258   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2259   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2260   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2261   if (flg) {
2262     PetscReal fact = 1.39;
2263     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2264     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2265     if (fact <= 1.0) fact = 1.39;
2266     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2267     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2268   }
2269   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 /*MC
2274    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2275 
2276    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2277    and MATMPISBAIJ otherwise.
2278 
2279    Options Database Keys:
2280 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2281 
2282   Level: beginner
2283 
2284 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2285 M*/
2286 
2287 /*@C
2288    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2289    the user should preallocate the matrix storage by setting the parameters
2290    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2291    performance can be increased by more than a factor of 50.
2292 
2293    Collective on Mat
2294 
2295    Input Parameters:
2296 +  B - the matrix
2297 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2298           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2299 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2300            submatrix  (same for all local rows)
2301 .  d_nnz - array containing the number of block nonzeros in the various block rows
2302            in the upper triangular and diagonal part of the in diagonal portion of the local
2303            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2304            for the diagonal entry and set a value even if it is zero.
2305 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2306            submatrix (same for all local rows).
2307 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2308            off-diagonal portion of the local submatrix that is right of the diagonal
2309            (possibly different for each block row) or NULL.
2310 
2311 
2312    Options Database Keys:
2313 .   -mat_no_unroll - uses code that does not unroll the loops in the
2314                      block calculations (much slower)
2315 .   -mat_block_size - size of the blocks to use
2316 
2317    Notes:
2318 
2319    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2320    than it must be used on all processors that share the object for that argument.
2321 
2322    If the *_nnz parameter is given then the *_nz parameter is ignored
2323 
2324    Storage Information:
2325    For a square global matrix we define each processor's diagonal portion
2326    to be its local rows and the corresponding columns (a square submatrix);
2327    each processor's off-diagonal portion encompasses the remainder of the
2328    local matrix (a rectangular submatrix).
2329 
2330    The user can specify preallocated storage for the diagonal part of
2331    the local submatrix with either d_nz or d_nnz (not both).  Set
2332    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2333    memory allocation.  Likewise, specify preallocated storage for the
2334    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2335 
2336    You can call MatGetInfo() to get information on how effective the preallocation was;
2337    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2338    You can also run with the option -info and look for messages with the string
2339    malloc in them to see if additional memory allocation was needed.
2340 
2341    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2342    the figure below we depict these three local rows and all columns (0-11).
2343 
2344 .vb
2345            0 1 2 3 4 5 6 7 8 9 10 11
2346           --------------------------
2347    row 3  |. . . d d d o o o o  o  o
2348    row 4  |. . . d d d o o o o  o  o
2349    row 5  |. . . d d d o o o o  o  o
2350           --------------------------
2351 .ve
2352 
2353    Thus, any entries in the d locations are stored in the d (diagonal)
2354    submatrix, and any entries in the o locations are stored in the
2355    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2356    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2357 
2358    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2359    plus the diagonal part of the d matrix,
2360    and o_nz should indicate the number of block nonzeros per row in the o matrix
2361 
2362    In general, for PDE problems in which most nonzeros are near the diagonal,
2363    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2364    or you will get TERRIBLE performance; see the users' manual chapter on
2365    matrices.
2366 
2367    Level: intermediate
2368 
2369 .keywords: matrix, block, aij, compressed row, sparse, parallel
2370 
2371 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2372 @*/
2373 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2374 {
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2379   PetscValidType(B,1);
2380   PetscValidLogicalCollectiveInt(B,bs,2);
2381   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 /*@C
2386    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2387    (block compressed row).  For good matrix assembly performance
2388    the user should preallocate the matrix storage by setting the parameters
2389    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2390    performance can be increased by more than a factor of 50.
2391 
2392    Collective on MPI_Comm
2393 
2394    Input Parameters:
2395 +  comm - MPI communicator
2396 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2397           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2398 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2399            This value should be the same as the local size used in creating the
2400            y vector for the matrix-vector product y = Ax.
2401 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2402            This value should be the same as the local size used in creating the
2403            x vector for the matrix-vector product y = Ax.
2404 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2405 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2406 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2407            submatrix  (same for all local rows)
2408 .  d_nnz - array containing the number of block nonzeros in the various block rows
2409            in the upper triangular portion of the in diagonal portion of the local
2410            (possibly different for each block block row) or NULL.
2411            If you plan to factor the matrix you must leave room for the diagonal entry and
2412            set its value even if it is zero.
2413 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2414            submatrix (same for all local rows).
2415 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2416            off-diagonal portion of the local submatrix (possibly different for
2417            each block row) or NULL.
2418 
2419    Output Parameter:
2420 .  A - the matrix
2421 
2422    Options Database Keys:
2423 .   -mat_no_unroll - uses code that does not unroll the loops in the
2424                      block calculations (much slower)
2425 .   -mat_block_size - size of the blocks to use
2426 .   -mat_mpi - use the parallel matrix data structures even on one processor
2427                (defaults to using SeqBAIJ format on one processor)
2428 
2429    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2430    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2431    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2432 
2433    Notes:
2434    The number of rows and columns must be divisible by blocksize.
2435    This matrix type does not support complex Hermitian operation.
2436 
2437    The user MUST specify either the local or global matrix dimensions
2438    (possibly both).
2439 
2440    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2441    than it must be used on all processors that share the object for that argument.
2442 
2443    If the *_nnz parameter is given then the *_nz parameter is ignored
2444 
2445    Storage Information:
2446    For a square global matrix we define each processor's diagonal portion
2447    to be its local rows and the corresponding columns (a square submatrix);
2448    each processor's off-diagonal portion encompasses the remainder of the
2449    local matrix (a rectangular submatrix).
2450 
2451    The user can specify preallocated storage for the diagonal part of
2452    the local submatrix with either d_nz or d_nnz (not both).  Set
2453    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2454    memory allocation.  Likewise, specify preallocated storage for the
2455    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2456 
2457    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2458    the figure below we depict these three local rows and all columns (0-11).
2459 
2460 .vb
2461            0 1 2 3 4 5 6 7 8 9 10 11
2462           --------------------------
2463    row 3  |. . . d d d o o o o  o  o
2464    row 4  |. . . d d d o o o o  o  o
2465    row 5  |. . . d d d o o o o  o  o
2466           --------------------------
2467 .ve
2468 
2469    Thus, any entries in the d locations are stored in the d (diagonal)
2470    submatrix, and any entries in the o locations are stored in the
2471    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2472    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2473 
2474    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2475    plus the diagonal part of the d matrix,
2476    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2477    In general, for PDE problems in which most nonzeros are near the diagonal,
2478    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2479    or you will get TERRIBLE performance; see the users' manual chapter on
2480    matrices.
2481 
2482    Level: intermediate
2483 
2484 .keywords: matrix, block, aij, compressed row, sparse, parallel
2485 
2486 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2487 @*/
2488 
2489 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2490 {
2491   PetscErrorCode ierr;
2492   PetscMPIInt    size;
2493 
2494   PetscFunctionBegin;
2495   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2496   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2497   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2498   if (size > 1) {
2499     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2500     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2501   } else {
2502     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2503     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2504   }
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 
2509 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2510 {
2511   Mat            mat;
2512   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2513   PetscErrorCode ierr;
2514   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2515   PetscScalar    *array;
2516 
2517   PetscFunctionBegin;
2518   *newmat = 0;
2519 
2520   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2521   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2522   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2523   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2524   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2525   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2526 
2527   mat->factortype   = matin->factortype;
2528   mat->preallocated = PETSC_TRUE;
2529   mat->assembled    = PETSC_TRUE;
2530   mat->insertmode   = NOT_SET_VALUES;
2531 
2532   a      = (Mat_MPISBAIJ*)mat->data;
2533   a->bs2 = oldmat->bs2;
2534   a->mbs = oldmat->mbs;
2535   a->nbs = oldmat->nbs;
2536   a->Mbs = oldmat->Mbs;
2537   a->Nbs = oldmat->Nbs;
2538 
2539 
2540   a->size         = oldmat->size;
2541   a->rank         = oldmat->rank;
2542   a->donotstash   = oldmat->donotstash;
2543   a->roworiented  = oldmat->roworiented;
2544   a->rowindices   = 0;
2545   a->rowvalues    = 0;
2546   a->getrowactive = PETSC_FALSE;
2547   a->barray       = 0;
2548   a->rstartbs     = oldmat->rstartbs;
2549   a->rendbs       = oldmat->rendbs;
2550   a->cstartbs     = oldmat->cstartbs;
2551   a->cendbs       = oldmat->cendbs;
2552 
2553   /* hash table stuff */
2554   a->ht           = 0;
2555   a->hd           = 0;
2556   a->ht_size      = 0;
2557   a->ht_flag      = oldmat->ht_flag;
2558   a->ht_fact      = oldmat->ht_fact;
2559   a->ht_total_ct  = 0;
2560   a->ht_insert_ct = 0;
2561 
2562   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2563   if (oldmat->colmap) {
2564 #if defined(PETSC_USE_CTABLE)
2565     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2566 #else
2567     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2568     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2569     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2570 #endif
2571   } else a->colmap = 0;
2572 
2573   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2574     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2575     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2576     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2577   } else a->garray = 0;
2578 
2579   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2580   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2581   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2582   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2583   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2584 
2585   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2586   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2587   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2588   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2589 
2590   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2591   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2592   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2593   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2594   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2595   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2596   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2597   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2598   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2599   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2600   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2601   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2602   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2603 
2604   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2605   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2606   a->sMvctx = oldmat->sMvctx;
2607   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2608 
2609   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2610   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2611   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2612   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2613   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2614   *newmat = mat;
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2619 {
2620   PetscErrorCode ierr;
2621   PetscInt       i,nz,j,rstart,rend;
2622   PetscScalar    *vals,*buf;
2623   MPI_Comm       comm;
2624   MPI_Status     status;
2625   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2626   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2627   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2628   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2629   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2630   PetscInt       dcount,kmax,k,nzcount,tmp;
2631   int            fd;
2632 
2633   PetscFunctionBegin;
2634   /* force binary viewer to load .info file if it has not yet done so */
2635   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2636   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2637   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2638   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2639   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2640   if (bs < 0) bs = 1;
2641 
2642   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2643   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2644   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2645   if (!rank) {
2646     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2647     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2648     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2649   }
2650 
2651   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2652   M    = header[1];
2653   N    = header[2];
2654 
2655   /* If global sizes are set, check if they are consistent with that given in the file */
2656   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2657   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2658 
2659   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2660 
2661   /*
2662      This code adds extra rows to make sure the number of rows is
2663      divisible by the blocksize
2664   */
2665   Mbs        = M/bs;
2666   extra_rows = bs - M + bs*(Mbs);
2667   if (extra_rows == bs) extra_rows = 0;
2668   else                  Mbs++;
2669   if (extra_rows &&!rank) {
2670     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2671   }
2672 
2673   /* determine ownership of all rows */
2674   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2675     mbs = Mbs/size + ((Mbs % size) > rank);
2676     m   = mbs*bs;
2677   } else { /* User Set */
2678     m   = newmat->rmap->n;
2679     mbs = m/bs;
2680   }
2681   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2682   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2683   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2684   rowners[0] = 0;
2685   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2686   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2687   rstart = rowners[rank];
2688   rend   = rowners[rank+1];
2689 
2690   /* distribute row lengths to all processors */
2691   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2692   if (!rank) {
2693     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2694     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2695     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2696     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2697     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2698     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2699     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2700   } else {
2701     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2702   }
2703 
2704   if (!rank) {   /* procs[0] */
2705     /* calculate the number of nonzeros on each processor */
2706     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2707     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2708     for (i=0; i<size; i++) {
2709       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2710         procsnz[i] += rowlengths[j];
2711       }
2712     }
2713     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2714 
2715     /* determine max buffer needed and allocate it */
2716     maxnz = 0;
2717     for (i=0; i<size; i++) {
2718       maxnz = PetscMax(maxnz,procsnz[i]);
2719     }
2720     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2721 
2722     /* read in my part of the matrix column indices  */
2723     nz     = procsnz[0];
2724     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2725     mycols = ibuf;
2726     if (size == 1) nz -= extra_rows;
2727     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2728     if (size == 1) {
2729       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2730     }
2731 
2732     /* read in every ones (except the last) and ship off */
2733     for (i=1; i<size-1; i++) {
2734       nz   = procsnz[i];
2735       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2736       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2737     }
2738     /* read in the stuff for the last proc */
2739     if (size != 1) {
2740       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2741       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2742       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2743       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2744     }
2745     ierr = PetscFree(cols);CHKERRQ(ierr);
2746   } else {  /* procs[i], i>0 */
2747     /* determine buffer space needed for message */
2748     nz = 0;
2749     for (i=0; i<m; i++) nz += locrowlens[i];
2750     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2751     mycols = ibuf;
2752     /* receive message of column indices*/
2753     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2754     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2755     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2756   }
2757 
2758   /* loop over local rows, determining number of off diagonal entries */
2759   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2760   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2761   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2762   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2763   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2764   rowcount = 0;
2765   nzcount  = 0;
2766   for (i=0; i<mbs; i++) {
2767     dcount  = 0;
2768     odcount = 0;
2769     for (j=0; j<bs; j++) {
2770       kmax = locrowlens[rowcount];
2771       for (k=0; k<kmax; k++) {
2772         tmp = mycols[nzcount++]/bs; /* block col. index */
2773         if (!mask[tmp]) {
2774           mask[tmp] = 1;
2775           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2776           else masked1[dcount++] = tmp; /* entry in diag portion */
2777         }
2778       }
2779       rowcount++;
2780     }
2781 
2782     dlens[i]  = dcount;  /* d_nzz[i] */
2783     odlens[i] = odcount; /* o_nzz[i] */
2784 
2785     /* zero out the mask elements we set */
2786     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2787     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2788   }
2789   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2790   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2791   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2792 
2793   if (!rank) {
2794     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2795     /* read in my part of the matrix numerical values  */
2796     nz     = procsnz[0];
2797     vals   = buf;
2798     mycols = ibuf;
2799     if (size == 1) nz -= extra_rows;
2800     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2801     if (size == 1) {
2802       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2803     }
2804 
2805     /* insert into matrix */
2806     jj = rstart*bs;
2807     for (i=0; i<m; i++) {
2808       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2809       mycols += locrowlens[i];
2810       vals   += locrowlens[i];
2811       jj++;
2812     }
2813 
2814     /* read in other processors (except the last one) and ship out */
2815     for (i=1; i<size-1; i++) {
2816       nz   = procsnz[i];
2817       vals = buf;
2818       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2819       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2820     }
2821     /* the last proc */
2822     if (size != 1) {
2823       nz   = procsnz[i] - extra_rows;
2824       vals = buf;
2825       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2826       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2827       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2828     }
2829     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2830 
2831   } else {
2832     /* receive numeric values */
2833     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2834 
2835     /* receive message of values*/
2836     vals   = buf;
2837     mycols = ibuf;
2838     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2839     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2840     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2841 
2842     /* insert into matrix */
2843     jj = rstart*bs;
2844     for (i=0; i<m; i++) {
2845       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2846       mycols += locrowlens[i];
2847       vals   += locrowlens[i];
2848       jj++;
2849     }
2850   }
2851 
2852   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2853   ierr = PetscFree(buf);CHKERRQ(ierr);
2854   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2855   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2856   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2857   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2858   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2859   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 /*XXXXX@
2864    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2865 
2866    Input Parameters:
2867 .  mat  - the matrix
2868 .  fact - factor
2869 
2870    Not Collective on Mat, each process can have a different hash factor
2871 
2872    Level: advanced
2873 
2874   Notes:
2875    This can also be set by the command line option: -mat_use_hash_table fact
2876 
2877 .keywords: matrix, hashtable, factor, HT
2878 
2879 .seealso: MatSetOption()
2880 @XXXXX*/
2881 
2882 
2883 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2884 {
2885   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2886   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2887   PetscReal      atmp;
2888   PetscReal      *work,*svalues,*rvalues;
2889   PetscErrorCode ierr;
2890   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2891   PetscMPIInt    rank,size;
2892   PetscInt       *rowners_bs,dest,count,source;
2893   PetscScalar    *va;
2894   MatScalar      *ba;
2895   MPI_Status     stat;
2896 
2897   PetscFunctionBegin;
2898   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2899   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2900   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2901 
2902   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2903   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2904 
2905   bs  = A->rmap->bs;
2906   mbs = a->mbs;
2907   Mbs = a->Mbs;
2908   ba  = b->a;
2909   bi  = b->i;
2910   bj  = b->j;
2911 
2912   /* find ownerships */
2913   rowners_bs = A->rmap->range;
2914 
2915   /* each proc creates an array to be distributed */
2916   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2917   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2918 
2919   /* row_max for B */
2920   if (rank != size-1) {
2921     for (i=0; i<mbs; i++) {
2922       ncols = bi[1] - bi[0]; bi++;
2923       brow  = bs*i;
2924       for (j=0; j<ncols; j++) {
2925         bcol = bs*(*bj);
2926         for (kcol=0; kcol<bs; kcol++) {
2927           col  = bcol + kcol;                /* local col index */
2928           col += rowners_bs[rank+1];      /* global col index */
2929           for (krow=0; krow<bs; krow++) {
2930             atmp = PetscAbsScalar(*ba); ba++;
2931             row  = brow + krow;   /* local row index */
2932             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2933             if (work[col] < atmp) work[col] = atmp;
2934           }
2935         }
2936         bj++;
2937       }
2938     }
2939 
2940     /* send values to its owners */
2941     for (dest=rank+1; dest<size; dest++) {
2942       svalues = work + rowners_bs[dest];
2943       count   = rowners_bs[dest+1]-rowners_bs[dest];
2944       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2945     }
2946   }
2947 
2948   /* receive values */
2949   if (rank) {
2950     rvalues = work;
2951     count   = rowners_bs[rank+1]-rowners_bs[rank];
2952     for (source=0; source<rank; source++) {
2953       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2954       /* process values */
2955       for (i=0; i<count; i++) {
2956         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2957       }
2958     }
2959   }
2960 
2961   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2962   ierr = PetscFree(work);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2967 {
2968   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2969   PetscErrorCode    ierr;
2970   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2971   PetscScalar       *x,*ptr,*from;
2972   Vec               bb1;
2973   const PetscScalar *b;
2974 
2975   PetscFunctionBegin;
2976   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2977   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2978 
2979   if (flag == SOR_APPLY_UPPER) {
2980     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2981     PetscFunctionReturn(0);
2982   }
2983 
2984   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2985     if (flag & SOR_ZERO_INITIAL_GUESS) {
2986       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2987       its--;
2988     }
2989 
2990     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2991     while (its--) {
2992 
2993       /* lower triangular part: slvec0b = - B^T*xx */
2994       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2995 
2996       /* copy xx into slvec0a */
2997       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2998       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2999       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3000       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3001 
3002       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3003 
3004       /* copy bb into slvec1a */
3005       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3006       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3007       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3008       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3009 
3010       /* set slvec1b = 0 */
3011       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3012 
3013       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3014       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3015       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3016       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3017 
3018       /* upper triangular part: bb1 = bb1 - B*x */
3019       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3020 
3021       /* local diagonal sweep */
3022       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3023     }
3024     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3025   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3026     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3027   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3028     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3029   } else if (flag & SOR_EISENSTAT) {
3030     Vec               xx1;
3031     PetscBool         hasop;
3032     const PetscScalar *diag;
3033     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3034     PetscInt          i,n;
3035 
3036     if (!mat->xx1) {
3037       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3038       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3039     }
3040     xx1 = mat->xx1;
3041     bb1 = mat->bb1;
3042 
3043     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3044 
3045     if (!mat->diag) {
3046       /* this is wrong for same matrix with new nonzero values */
3047       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3048       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3049     }
3050     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3051 
3052     if (hasop) {
3053       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3054       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3055     } else {
3056       /*
3057           These two lines are replaced by code that may be a bit faster for a good compiler
3058       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3059       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3060       */
3061       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3062       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3063       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3064       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3065       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3066       if (omega == 1.0) {
3067         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3068         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3069       } else {
3070         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3071         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3072       }
3073       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3074       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3075       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3076       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3077     }
3078 
3079     /* multiply off-diagonal portion of matrix */
3080     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3081     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3082     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3083     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3084     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3085     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3086     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3087     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3088     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3089     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3090 
3091     /* local sweep */
3092     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3093     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3094   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3099 {
3100   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3101   PetscErrorCode ierr;
3102   Vec            lvec1,bb1;
3103 
3104   PetscFunctionBegin;
3105   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3106   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3107 
3108   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3109     if (flag & SOR_ZERO_INITIAL_GUESS) {
3110       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3111       its--;
3112     }
3113 
3114     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3115     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3116     while (its--) {
3117       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3118 
3119       /* lower diagonal part: bb1 = bb - B^T*xx */
3120       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3121       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3122 
3123       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3124       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3125       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3126 
3127       /* upper diagonal part: bb1 = bb1 - B*x */
3128       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3129       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3130 
3131       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3132 
3133       /* diagonal sweep */
3134       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3135     }
3136     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3137     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3138   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3139   PetscFunctionReturn(0);
3140 }
3141 
3142 /*@
3143      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3144          CSR format the local rows.
3145 
3146    Collective on MPI_Comm
3147 
3148    Input Parameters:
3149 +  comm - MPI communicator
3150 .  bs - the block size, only a block size of 1 is supported
3151 .  m - number of local rows (Cannot be PETSC_DECIDE)
3152 .  n - This value should be the same as the local size used in creating the
3153        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3154        calculated if N is given) For square matrices n is almost always m.
3155 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3156 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3157 .   i - row indices
3158 .   j - column indices
3159 -   a - matrix values
3160 
3161    Output Parameter:
3162 .   mat - the matrix
3163 
3164    Level: intermediate
3165 
3166    Notes:
3167        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3168      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3169      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3170 
3171        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3172 
3173 .keywords: matrix, aij, compressed row, sparse, parallel
3174 
3175 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3176           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3177 @*/
3178 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3179 {
3180   PetscErrorCode ierr;
3181 
3182 
3183   PetscFunctionBegin;
3184   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3185   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3186   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3187   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3188   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3189   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3190   PetscFunctionReturn(0);
3191 }
3192 
3193 
3194 /*@C
3195    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3196    (the default parallel PETSc format).
3197 
3198    Collective on MPI_Comm
3199 
3200    Input Parameters:
3201 +  B - the matrix
3202 .  bs - the block size
3203 .  i - the indices into j for the start of each local row (starts with zero)
3204 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3205 -  v - optional values in the matrix
3206 
3207    Level: developer
3208 
3209 .keywords: matrix, aij, compressed row, sparse, parallel
3210 
3211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3212 @*/
3213 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3214 {
3215   PetscErrorCode ierr;
3216 
3217   PetscFunctionBegin;
3218   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3219   PetscFunctionReturn(0);
3220 }
3221 
3222 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3223 {
3224   PetscErrorCode ierr;
3225   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3226   PetscInt       *indx;
3227   PetscScalar    *values;
3228 
3229   PetscFunctionBegin;
3230   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3231   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3232     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3233     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3234     PetscInt       *bindx,rmax=a->rmax,j;
3235 
3236     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3237     mbs = m/bs; Nbs = N/cbs;
3238     if (n == PETSC_DECIDE) {
3239       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3240     }
3241     /* Check sum(n) = Nbs */
3242     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3243     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3244 
3245     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3246     rstart -= mbs;
3247 
3248     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3249     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3250     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3251     for (i=0; i<mbs; i++) {
3252       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3253       nnz = nnz/bs;
3254       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3255       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3256       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3257     }
3258     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3259     ierr = PetscFree(bindx);CHKERRQ(ierr);
3260 
3261     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3262     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3263     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3264     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3265     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3266     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3267   }
3268 
3269   /* numeric phase */
3270   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3271   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3272 
3273   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3274   for (i=0; i<m; i++) {
3275     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3276     Ii   = i + rstart;
3277     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3278     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3279   }
3280   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3281   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3282   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285