xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 609bdbee21ea3be08735c64dbe00a9ab27759925)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
11 {
12   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
17   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
22 {
23   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
28   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
33   { \
34  \
35     brow = row/bs;  \
36     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
37     rmax = aimax[brow]; nrow = ailen[brow]; \
38     bcol = col/bs; \
39     ridx = row % bs; cidx = col % bs; \
40     low  = 0; high = nrow; \
41     while (high-low > 3) { \
42       t = (low+high)/2; \
43       if (rp[t] > bcol) high = t; \
44       else              low  = t; \
45     } \
46     for (_i=low; _i<high; _i++) { \
47       if (rp[_i] > bcol) break; \
48       if (rp[_i] == bcol) { \
49         bap = ap + bs2*_i + bs*cidx + ridx; \
50         if (addv == ADD_VALUES) *bap += value;  \
51         else                    *bap  = value;  \
52         goto a_noinsert; \
53       } \
54     } \
55     if (a->nonew == 1) goto a_noinsert; \
56     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
57     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
58     N = nrow++ - 1;  \
59     /* shift up all the later entries in this row */ \
60     for (ii=N; ii>=_i; ii--) { \
61       rp[ii+1] = rp[ii]; \
62       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
63     } \
64     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
65     rp[_i]                      = bcol;  \
66     ap[bs2*_i + bs*cidx + ridx] = value;  \
67     A->nonzerostate++;\
68 a_noinsert:; \
69     ailen[brow] = nrow; \
70   }
71 
72 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
73   { \
74     brow = row/bs;  \
75     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
76     rmax = bimax[brow]; nrow = bilen[brow]; \
77     bcol = col/bs; \
78     ridx = row % bs; cidx = col % bs; \
79     low  = 0; high = nrow; \
80     while (high-low > 3) { \
81       t = (low+high)/2; \
82       if (rp[t] > bcol) high = t; \
83       else              low  = t; \
84     } \
85     for (_i=low; _i<high; _i++) { \
86       if (rp[_i] > bcol) break; \
87       if (rp[_i] == bcol) { \
88         bap = ap + bs2*_i + bs*cidx + ridx; \
89         if (addv == ADD_VALUES) *bap += value;  \
90         else                    *bap  = value;  \
91         goto b_noinsert; \
92       } \
93     } \
94     if (b->nonew == 1) goto b_noinsert; \
95     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
96     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
97     N = nrow++ - 1;  \
98     /* shift up all the later entries in this row */ \
99     for (ii=N; ii>=_i; ii--) { \
100       rp[ii+1] = rp[ii]; \
101       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
102     } \
103     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
104     rp[_i]                      = bcol;  \
105     ap[bs2*_i + bs*cidx + ridx] = value;  \
106     B->nonzerostate++;\
107 b_noinsert:; \
108     bilen[brow] = nrow; \
109   }
110 
111 /* Only add/insert a(i,j) with i<=j (blocks).
112    Any a(i,j) with i>j input by user is ingored.
113 */
114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
117   MatScalar      value;
118   PetscBool      roworiented = baij->roworiented;
119   PetscErrorCode ierr;
120   PetscInt       i,j,row,col;
121   PetscInt       rstart_orig=mat->rmap->rstart;
122   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
123   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
124 
125   /* Some Variables required in the macro */
126   Mat          A     = baij->A;
127   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
128   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
129   MatScalar    *aa   =a->a;
130 
131   Mat         B     = baij->B;
132   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
133   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
134   MatScalar   *ba   =b->a;
135 
136   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
137   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
138   MatScalar *ap,*bap;
139 
140   /* for stash */
141   PetscInt  n_loc, *in_loc = NULL;
142   MatScalar *v_loc = NULL;
143 
144   PetscFunctionBegin;
145   if (!baij->donotstash) {
146     if (n > baij->n_loc) {
147       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
148       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
149       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
150       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
151 
152       baij->n_loc = n;
153     }
154     in_loc = baij->in_loc;
155     v_loc  = baij->v_loc;
156   }
157 
158   for (i=0; i<m; i++) {
159     if (im[i] < 0) continue;
160 #if defined(PETSC_USE_DEBUG)
161     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
162 #endif
163     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
164       row = im[i] - rstart_orig;              /* local row index */
165       for (j=0; j<n; j++) {
166         if (im[i]/bs > in[j]/bs) {
167           if (a->ignore_ltriangular) {
168             continue;    /* ignore lower triangular blocks */
169           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
170         }
171         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
172           col  = in[j] - cstart_orig;         /* local col index */
173           brow = row/bs; bcol = col/bs;
174           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
175           if (roworiented) value = v[i*n+j];
176           else             value = v[i+j*m];
177           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
178           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
179         } else if (in[j] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
182 #endif
183         else {  /* off-diag entry (B) */
184           if (mat->was_assembled) {
185             if (!baij->colmap) {
186               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187             }
188 #if defined(PETSC_USE_CTABLE)
189             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
190             col  = col - 1;
191 #else
192             col = baij->colmap[in[j]/bs] - 1;
193 #endif
194             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
195               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
196               col  =  in[j];
197               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
198               B    = baij->B;
199               b    = (Mat_SeqBAIJ*)(B)->data;
200               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
201               ba   = b->a;
202             } else col += in[j]%bs;
203           } else col = in[j];
204           if (roworiented) value = v[i*n+j];
205           else             value = v[i+j*m];
206           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
207           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
208         }
209       }
210     } else {  /* off processor entry */
211       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
212       if (!baij->donotstash) {
213         mat->assembled = PETSC_FALSE;
214         n_loc          = 0;
215         for (j=0; j<n; j++) {
216           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
217           in_loc[n_loc] = in[j];
218           if (roworiented) {
219             v_loc[n_loc] = v[i*n+j];
220           } else {
221             v_loc[n_loc] = v[j*m+i];
222           }
223           n_loc++;
224         }
225         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
226       }
227     }
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
233 {
234   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
235   PetscErrorCode    ierr;
236   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
237   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
238   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
239   PetscBool         roworiented=a->roworiented;
240   const PetscScalar *value     = v;
241   MatScalar         *ap,*aa = a->a,*bap;
242 
243   PetscFunctionBegin;
244   if (col < row) {
245     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
246     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
247   }
248   rp   = aj + ai[row];
249   ap   = aa + bs2*ai[row];
250   rmax = imax[row];
251   nrow = ailen[row];
252   value = v;
253   low   = 0;
254   high  = nrow;
255 
256   while (high-low > 7) {
257     t = (low+high)/2;
258     if (rp[t] > col) high = t;
259     else             low  = t;
260   }
261   for (i=low; i<high; i++) {
262     if (rp[i] > col) break;
263     if (rp[i] == col) {
264       bap = ap +  bs2*i;
265       if (roworiented) {
266         if (is == ADD_VALUES) {
267           for (ii=0; ii<bs; ii++) {
268             for (jj=ii; jj<bs2; jj+=bs) {
269               bap[jj] += *value++;
270             }
271           }
272         } else {
273           for (ii=0; ii<bs; ii++) {
274             for (jj=ii; jj<bs2; jj+=bs) {
275               bap[jj] = *value++;
276             }
277           }
278         }
279       } else {
280         if (is == ADD_VALUES) {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=0; jj<bs; jj++) {
283               *bap++ += *value++;
284             }
285           }
286         } else {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=0; jj<bs; jj++) {
289               *bap++  = *value++;
290             }
291           }
292         }
293       }
294       goto noinsert2;
295     }
296   }
297   if (nonew == 1) goto noinsert2;
298   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
299   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
300   N = nrow++ - 1; high++;
301   /* shift up all the later entries in this row */
302   for (ii=N; ii>=i; ii--) {
303     rp[ii+1] = rp[ii];
304     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
305   }
306   if (N >= i) {
307     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
308   }
309   rp[i] = col;
310   bap   = ap +  bs2*i;
311   if (roworiented) {
312     for (ii=0; ii<bs; ii++) {
313       for (jj=ii; jj<bs2; jj+=bs) {
314         bap[jj] = *value++;
315       }
316     }
317   } else {
318     for (ii=0; ii<bs; ii++) {
319       for (jj=0; jj<bs; jj++) {
320         *bap++ = *value++;
321       }
322     }
323   }
324   noinsert2:;
325   ailen[row] = nrow;
326   PetscFunctionReturn(0);
327 }
328 
329 /*
330    This routine is exactly duplicated in mpibaij.c
331 */
332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
333 {
334   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
335   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
336   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
337   PetscErrorCode    ierr;
338   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
339   PetscBool         roworiented=a->roworiented;
340   const PetscScalar *value     = v;
341   MatScalar         *ap,*aa = a->a,*bap;
342 
343   PetscFunctionBegin;
344   rp   = aj + ai[row];
345   ap   = aa + bs2*ai[row];
346   rmax = imax[row];
347   nrow = ailen[row];
348   low  = 0;
349   high = nrow;
350   value = v;
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++,value+=bs) {
377             for (jj=0; jj<bs; jj++) {
378               bap[jj] += value[jj];
379             }
380             bap += bs;
381           }
382         } else {
383           for (ii=0; ii<bs; ii++,value+=bs) {
384             for (jj=0; jj<bs; jj++) {
385               bap[jj]  = value[jj];
386             }
387             bap += bs;
388           }
389         }
390       }
391       goto noinsert2;
392     }
393   }
394   if (nonew == 1) goto noinsert2;
395   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
396   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
397   N = nrow++ - 1; high++;
398   /* shift up all the later entries in this row */
399   for (ii=N; ii>=i; ii--) {
400     rp[ii+1] = rp[ii];
401     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
402   }
403   if (N >= i) {
404     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
405   }
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427     This routine could be optimized by removing the need for the block copy below and passing stride information
428   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
429 */
430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
431 {
432   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
433   const MatScalar *value;
434   MatScalar       *barray     =baij->barray;
435   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
436   PetscErrorCode  ierr;
437   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
438   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
439   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   if (!barray) {
443     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
444     baij->barray = barray;
445   }
446 
447   if (roworiented) {
448     stepval = (n-1)*bs;
449   } else {
450     stepval = (m-1)*bs;
451   }
452   for (i=0; i<m; i++) {
453     if (im[i] < 0) continue;
454 #if defined(PETSC_USE_DEBUG)
455     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
456 #endif
457     if (im[i] >= rstart && im[i] < rend) {
458       row = im[i] - rstart;
459       for (j=0; j<n; j++) {
460         if (im[i] > in[j]) {
461           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
462           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
463         }
464         /* If NumCol = 1 then a copy is not required */
465         if ((roworiented) && (n == 1)) {
466           barray = (MatScalar*) v + i*bs2;
467         } else if ((!roworiented) && (m == 1)) {
468           barray = (MatScalar*) v + j*bs2;
469         } else { /* Here a copy is required */
470           if (roworiented) {
471             value = v + i*(stepval+bs)*bs + j*bs;
472           } else {
473             value = v + j*(stepval+bs)*bs + i*bs;
474           }
475           for (ii=0; ii<bs; ii++,value+=stepval) {
476             for (jj=0; jj<bs; jj++) {
477               *barray++ = *value++;
478             }
479           }
480           barray -=bs2;
481         }
482 
483         if (in[j] >= cstart && in[j] < cend) {
484           col  = in[j] - cstart;
485           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
486         } else if (in[j] < 0) continue;
487 #if defined(PETSC_USE_DEBUG)
488         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
489 #endif
490         else {
491           if (mat->was_assembled) {
492             if (!baij->colmap) {
493               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
494             }
495 
496 #if defined(PETSC_USE_DEBUG)
497 #if defined(PETSC_USE_CTABLE)
498             { PetscInt data;
499               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
500               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
501             }
502 #else
503             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
504 #endif
505 #endif
506 #if defined(PETSC_USE_CTABLE)
507             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
508             col  = (col - 1)/bs;
509 #else
510             col = (baij->colmap[in[j]] - 1)/bs;
511 #endif
512             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
513               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
514               col  = in[j];
515             }
516           } else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
522       if (!baij->donotstash) {
523         if (roworiented) {
524           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
535 {
536   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
537   PetscErrorCode ierr;
538   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
539   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
540 
541   PetscFunctionBegin;
542   for (i=0; i<m; i++) {
543     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
544     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
545     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
546       row = idxm[i] - bsrstart;
547       for (j=0; j<n; j++) {
548         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
549         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
550         if (idxn[j] >= bscstart && idxn[j] < bscend) {
551           col  = idxn[j] - bscstart;
552           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
553         } else {
554           if (!baij->colmap) {
555             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
556           }
557 #if defined(PETSC_USE_CTABLE)
558           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
559           data--;
560 #else
561           data = baij->colmap[idxn[j]/bs]-1;
562 #endif
563           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
564           else {
565             col  = data + idxn[j]%bs;
566             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567           }
568         }
569       }
570     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
576 {
577   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
578   PetscErrorCode ierr;
579   PetscReal      sum[2],*lnorm2;
580 
581   PetscFunctionBegin;
582   if (baij->size == 1) {
583     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
584   } else {
585     if (type == NORM_FROBENIUS) {
586       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
587       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
588       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
589       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
590       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
591       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
592       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
593       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
594     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
595       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
596       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
597       PetscReal    *rsum,*rsum2,vabs;
598       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
599       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
600       MatScalar    *v;
601 
602       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
603       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
604       /* Amat */
605       v = amat->a; jj = amat->j;
606       for (brow=0; brow<mbs; brow++) {
607         grow = bs*(rstart + brow);
608         nz   = amat->i[brow+1] - amat->i[brow];
609         for (bcol=0; bcol<nz; bcol++) {
610           gcol = bs*(rstart + *jj); jj++;
611           for (col=0; col<bs; col++) {
612             for (row=0; row<bs; row++) {
613               vabs            = PetscAbsScalar(*v); v++;
614               rsum[gcol+col] += vabs;
615               /* non-diagonal block */
616               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
617             }
618           }
619         }
620         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
621       }
622       /* Bmat */
623       v = bmat->a; jj = bmat->j;
624       for (brow=0; brow<mbs; brow++) {
625         grow = bs*(rstart + brow);
626         nz = bmat->i[brow+1] - bmat->i[brow];
627         for (bcol=0; bcol<nz; bcol++) {
628           gcol = bs*garray[*jj]; jj++;
629           for (col=0; col<bs; col++) {
630             for (row=0; row<bs; row++) {
631               vabs            = PetscAbsScalar(*v); v++;
632               rsum[gcol+col] += vabs;
633               rsum[grow+row] += vabs;
634             }
635           }
636         }
637         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
638       }
639       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640       *norm = 0.0;
641       for (col=0; col<mat->cmap->N; col++) {
642         if (rsum2[col] > *norm) *norm = rsum2[col];
643       }
644       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
653   PetscErrorCode ierr;
654   PetscInt       nstash,reallocs;
655 
656   PetscFunctionBegin;
657   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
658 
659   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
671   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
674   PetscInt       *row,*col;
675   PetscBool      other_disassembled;
676   PetscMPIInt    n;
677   PetscBool      r1,r2,r3;
678   MatScalar      *val;
679 
680   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
681   PetscFunctionBegin;
682   if (!baij->donotstash &&  !mat->nooffprocentries) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       if (!flg) break;
686 
687       for (i=0; i<n;) {
688         /* Now identify the consecutive vals belonging to the same row */
689         for (j=i,rstart=row[j]; j<n; j++) {
690           if (row[j] != rstart) break;
691         }
692         if (j < n) ncols = j-i;
693         else       ncols = n-i;
694         /* Now assemble all these values with a single function call */
695         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
696         i    = j;
697       }
698     }
699     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
700     /* Now process the block-stash. Since the values are stashed column-oriented,
701        set the roworiented flag to column oriented, and after MatSetValues()
702        restore the original flags */
703     r1 = baij->roworiented;
704     r2 = a->roworiented;
705     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
706 
707     baij->roworiented = PETSC_FALSE;
708     a->roworiented    = PETSC_FALSE;
709 
710     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) {
718           if (row[j] != rstart) break;
719         }
720         if (j < n) ncols = j-i;
721         else       ncols = n-i;
722         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
723         i    = j;
724       }
725     }
726     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
727 
728     baij->roworiented = r1;
729     a->roworiented    = r2;
730 
731     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
732   }
733 
734   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
735   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
736 
737   /* determine if any processor has disassembled, if so we must
738      also disassemble ourselfs, in order that we may reassemble. */
739   /*
740      if nonzero structure of submatrix B cannot change then we know that
741      no processor disassembled thus we can skip this stuff
742   */
743   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
744     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745     if (mat->was_assembled && !other_disassembled) {
746       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
747     }
748   }
749 
750   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
751     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
752   }
753   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
754   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
755 
756   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
757 
758   baij->rowvalues = 0;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
762     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer);
769 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
770 #include <petscdraw.h>
771 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
772 {
773   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
774   PetscErrorCode    ierr;
775   PetscInt          bs   = mat->rmap->bs;
776   PetscMPIInt       rank = baij->rank;
777   PetscBool         iascii,isdraw;
778   PetscViewer       sviewer;
779   PetscViewerFormat format;
780 
781   PetscFunctionBegin;
782   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
783   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
784   if (iascii) {
785     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
786     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
787       MatInfo info;
788       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
789       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
790       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
791       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
792       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
794       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
795       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
796       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
798       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
799       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
800       PetscFunctionReturn(0);
801     } else if (format == PETSC_VIEWER_ASCII_INFO) {
802       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
803       PetscFunctionReturn(0);
804     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw draw;
811     PetscBool isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
814     if (isnull) PetscFunctionReturn(0);
815   }
816 
817   {
818     /* assemble the entire matrix onto first processor. */
819     Mat          A;
820     Mat_SeqSBAIJ *Aloc;
821     Mat_SeqBAIJ  *Bloc;
822     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
823     MatScalar    *a;
824     const char   *matname;
825 
826     /* Should this be the same type as mat? */
827     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
828     if (!rank) {
829       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
830     } else {
831       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
832     }
833     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
834     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
835     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
836     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
837 
838     /* copy over the A part */
839     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
840     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
841     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
842 
843     for (i=0; i<mbs; i++) {
844       rvals[0] = bs*(baij->rstartbs + i);
845       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
846       for (j=ai[i]; j<ai[i+1]; j++) {
847         col = (baij->cstartbs+aj[j])*bs;
848         for (k=0; k<bs; k++) {
849           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
850           col++;
851           a += bs;
852         }
853       }
854     }
855     /* copy over the B part */
856     Bloc = (Mat_SeqBAIJ*)baij->B->data;
857     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
858     for (i=0; i<mbs; i++) {
859 
860       rvals[0] = bs*(baij->rstartbs + i);
861       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
862       for (j=ai[i]; j<ai[i+1]; j++) {
863         col = baij->garray[aj[j]]*bs;
864         for (k=0; k<bs; k++) {
865           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
866           col++;
867           a += bs;
868         }
869       }
870     }
871     ierr = PetscFree(rvals);CHKERRQ(ierr);
872     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
874     /*
875        Everyone has to call to draw the matrix since the graphics waits are
876        synchronized across all processors that share the PetscDraw object
877     */
878     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
879     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
880     if (!rank) {
881       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
882       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
883     }
884     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
885     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
886     ierr = MatDestroy(&A);CHKERRQ(ierr);
887   }
888   PetscFunctionReturn(0);
889 }
890 
891 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
892 {
893   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
894   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
895   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
896   PetscErrorCode ierr;
897   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
898   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
899   int            fd;
900   PetscScalar    *column_values;
901   FILE           *file;
902   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
903   PetscInt       message_count,flowcontrolcount;
904 
905   PetscFunctionBegin;
906   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
907   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
908   nz   = bs2*(A->nz + B->nz);
909   rlen = mat->rmap->n;
910   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
911   if (!rank) {
912     header[0] = MAT_FILE_CLASSID;
913     header[1] = mat->rmap->N;
914     header[2] = mat->cmap->N;
915 
916     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
917     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
918     /* get largest number of rows any processor has */
919     range = mat->rmap->range;
920     for (i=1; i<size; i++) {
921       rlen = PetscMax(rlen,range[i+1] - range[i]);
922     }
923   } else {
924     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
925   }
926 
927   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
928   /* compute lengths of each row  */
929   for (i=0; i<a->mbs; i++) {
930     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
931   }
932   /* store the row lengths to the file */
933   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
934   if (!rank) {
935     MPI_Status status;
936     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
937     rlen = (range[1] - range[0])/bs;
938     for (i=0; i<rlen; i++) {
939       for (j=0; j<bs; j++) {
940         row_lens[i*bs+j] = bs*crow_lens[i];
941       }
942     }
943     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
944     for (i=1; i<size; i++) {
945       rlen = (range[i+1] - range[i])/bs;
946       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
947       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
948       for (k=0; k<rlen; k++) {
949         for (j=0; j<bs; j++) {
950           row_lens[k*bs+j] = bs*crow_lens[k];
951         }
952       }
953       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
954     }
955     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
956     ierr = PetscFree(row_lens);CHKERRQ(ierr);
957   } else {
958     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
959     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
960     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
961   }
962   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
963 
964   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
965      information needed to make it for each row from a block row. This does require more communication but still not more than
966      the communication needed for the nonzero values  */
967   nzmax = nz; /*  space a largest processor needs */
968   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
969   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
970   cnt   = 0;
971   for (i=0; i<a->mbs; i++) {
972     pcnt = cnt;
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ((col = garray[B->j[j]]) > cstart) break;
975       for (l=0; l<bs; l++) {
976         column_indices[cnt++] = bs*col+l;
977       }
978     }
979     for (k=A->i[i]; k<A->i[i+1]; k++) {
980       for (l=0; l<bs; l++) {
981         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
982       }
983     }
984     for (; j<B->i[i+1]; j++) {
985       for (l=0; l<bs; l++) {
986         column_indices[cnt++] = bs*garray[B->j[j]]+l;
987       }
988     }
989     len = cnt - pcnt;
990     for (k=1; k<bs; k++) {
991       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
992       cnt += len;
993     }
994   }
995   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
996 
997   /* store the columns to the file */
998   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
999   if (!rank) {
1000     MPI_Status status;
1001     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     for (i=1; i<size; i++) {
1003       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1004       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1005       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1006       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1007     }
1008     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1009   } else {
1010     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1011     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1013     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1016 
1017   /* load up the numerical values */
1018   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1019   cnt  = 0;
1020   for (i=0; i<a->mbs; i++) {
1021     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1022     for (j=B->i[i]; j<B->i[i+1]; j++) {
1023       if (garray[B->j[j]] > cstart) break;
1024       for (l=0; l<bs; l++) {
1025         for (ll=0; ll<bs; ll++) {
1026           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1027         }
1028       }
1029       cnt += bs;
1030     }
1031     for (k=A->i[i]; k<A->i[i+1]; k++) {
1032       for (l=0; l<bs; l++) {
1033         for (ll=0; ll<bs; ll++) {
1034           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1035         }
1036       }
1037       cnt += bs;
1038     }
1039     for (; j<B->i[i+1]; j++) {
1040       for (l=0; l<bs; l++) {
1041         for (ll=0; ll<bs; ll++) {
1042           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1043         }
1044       }
1045       cnt += bs;
1046     }
1047     cnt += (bs-1)*rlen;
1048   }
1049   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1050 
1051   /* store the column values to the file */
1052   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1053   if (!rank) {
1054     MPI_Status status;
1055     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1056     for (i=1; i<size; i++) {
1057       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1058       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1059       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1060       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1061     }
1062     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1063   } else {
1064     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1065     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1066     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1067     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1068   }
1069   ierr = PetscFree(column_values);CHKERRQ(ierr);
1070 
1071   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1072   if (file) {
1073     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1074   }
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1079 {
1080   PetscErrorCode ierr;
1081   PetscBool      iascii,isdraw,issocket,isbinary;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1085   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1086   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1087   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1088   if (iascii || isdraw || issocket) {
1089     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1090   } else if (isbinary) {
1091     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1097 {
1098   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102 #if defined(PETSC_USE_LOG)
1103   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1104 #endif
1105   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1106   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1107   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1108   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1109 #if defined(PETSC_USE_CTABLE)
1110   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1111 #else
1112   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1113 #endif
1114   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1115   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1116   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1117   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1118   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1119   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1120   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1121   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1122   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1123   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1124   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1125   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1126   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1127   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1128   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1129 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1130   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1131 #endif
1132   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1133   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1134   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1135   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1136 
1137   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
1142 #if defined(PETSC_HAVE_ELEMENTAL)
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1144 #endif
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1149 {
1150   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1151   PetscErrorCode    ierr;
1152   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1153   PetscScalar       *from;
1154   const PetscScalar *x;
1155 
1156   PetscFunctionBegin;
1157   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1158   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1159 
1160   /* diagonal part */
1161   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1162   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1163 
1164   /* subdiagonal part */
1165   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1166 
1167   /* copy x into the vec slvec0 */
1168   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1169   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1170 
1171   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1172   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1173   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1174 
1175   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177   /* supperdiagonal part */
1178   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1183 {
1184   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1185   PetscErrorCode    ierr;
1186   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1187   PetscScalar       *from;
1188   const PetscScalar *x;
1189 
1190   PetscFunctionBegin;
1191   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1192   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1193 
1194   /* diagonal part */
1195   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1196   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1197 
1198   /* subdiagonal part */
1199   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1200 
1201   /* copy x into the vec slvec0 */
1202   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1203   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1204 
1205   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1206   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1207   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1208 
1209   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1210   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1211   /* supperdiagonal part */
1212   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1217 {
1218   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1219   PetscErrorCode ierr;
1220   PetscInt       nt;
1221 
1222   PetscFunctionBegin;
1223   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1224   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1225 
1226   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1227   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1228 
1229   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230   /* do diagonal part */
1231   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1232   /* do supperdiagonal part */
1233   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1235   /* do subdiagonal part */
1236   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1237   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1243 {
1244   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1245   PetscErrorCode    ierr;
1246   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1247   PetscScalar       *from,zero=0.0;
1248   const PetscScalar *x;
1249 
1250   PetscFunctionBegin;
1251   /*
1252   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1253   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1254   */
1255   /* diagonal part */
1256   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1257   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1258 
1259   /* subdiagonal part */
1260   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1261 
1262   /* copy x into the vec slvec0 */
1263   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1264   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1265   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1266   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1267 
1268   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1270   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1271 
1272   /* supperdiagonal part */
1273   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1278 {
1279   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281 
1282   PetscFunctionBegin;
1283   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284   /* do diagonal part */
1285   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1286   /* do supperdiagonal part */
1287   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1288   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1289 
1290   /* do subdiagonal part */
1291   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1292   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1293   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 /*
1298   This only works correctly for square matrices where the subblock A->A is the
1299    diagonal block
1300 */
1301 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1302 {
1303   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1304   PetscErrorCode ierr;
1305 
1306   PetscFunctionBegin;
1307   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1308   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1319   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324 {
1325   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1326   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1327   PetscErrorCode ierr;
1328   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1329   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1330   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1331 
1332   PetscFunctionBegin;
1333   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1334   mat->getrowactive = PETSC_TRUE;
1335 
1336   if (!mat->rowvalues && (idx || v)) {
1337     /*
1338         allocate enough space to hold information from the longest row.
1339     */
1340     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1341     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1342     PetscInt     max = 1,mbs = mat->mbs,tmp;
1343     for (i=0; i<mbs; i++) {
1344       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1345       if (max < tmp) max = tmp;
1346     }
1347     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1348   }
1349 
1350   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1351   lrow = row - brstart;  /* local row index */
1352 
1353   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1354   if (!v)   {pvA = 0; pvB = 0;}
1355   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1356   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1357   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1358   nztot = nzA + nzB;
1359 
1360   cmap = mat->garray;
1361   if (v  || idx) {
1362     if (nztot) {
1363       /* Sort by increasing column numbers, assuming A and B already sorted */
1364       PetscInt imark = -1;
1365       if (v) {
1366         *v = v_p = mat->rowvalues;
1367         for (i=0; i<nzB; i++) {
1368           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1369           else break;
1370         }
1371         imark = i;
1372         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1373         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1374       }
1375       if (idx) {
1376         *idx = idx_p = mat->rowindices;
1377         if (imark > -1) {
1378           for (i=0; i<imark; i++) {
1379             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1380           }
1381         } else {
1382           for (i=0; i<nzB; i++) {
1383             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1384             else break;
1385           }
1386           imark = i;
1387         }
1388         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1389         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1390       }
1391     } else {
1392       if (idx) *idx = 0;
1393       if (v)   *v   = 0;
1394     }
1395   }
1396   *nz  = nztot;
1397   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1398   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1403 {
1404   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1405 
1406   PetscFunctionBegin;
1407   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1408   baij->getrowactive = PETSC_FALSE;
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1413 {
1414   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1415   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1416 
1417   PetscFunctionBegin;
1418   aA->getrow_utriangular = PETSC_TRUE;
1419   PetscFunctionReturn(0);
1420 }
1421 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1422 {
1423   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1424   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1425 
1426   PetscFunctionBegin;
1427   aA->getrow_utriangular = PETSC_FALSE;
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1432 {
1433   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1434   PetscErrorCode ierr;
1435 
1436   PetscFunctionBegin;
1437   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1438   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1443 {
1444   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1445   PetscErrorCode ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1449   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1454    Input: isrow       - distributed(parallel),
1455           iscol_local - locally owned (seq)
1456 */
1457 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1458 {
1459   PetscErrorCode ierr;
1460   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1461   const PetscInt *ptr1,*ptr2;
1462 
1463   PetscFunctionBegin;
1464   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1465   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1466   if (sz1 > sz2) {
1467     *flg = PETSC_FALSE;
1468     PetscFunctionReturn(0);
1469   }
1470 
1471   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1472   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1473 
1474   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1475   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1476   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1477   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1478   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1479   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1480 
1481   nmatch=0;
1482   k     = 0;
1483   for (i=0; i<sz1; i++){
1484     for (j=k; j<sz2; j++){
1485       if (a1[i] == a2[j]) {
1486         k = j; nmatch++;
1487         break;
1488       }
1489     }
1490   }
1491   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1492   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1493   ierr = PetscFree(a1);CHKERRQ(ierr);
1494   ierr = PetscFree(a2);CHKERRQ(ierr);
1495   if (nmatch < sz1) {
1496     *flg = PETSC_FALSE;
1497   } else {
1498     *flg = PETSC_TRUE;
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1504 {
1505   PetscErrorCode ierr;
1506   IS             iscol_local;
1507   PetscInt       csize;
1508   PetscBool      isequal;
1509 
1510   PetscFunctionBegin;
1511   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1512   if (call == MAT_REUSE_MATRIX) {
1513     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1514     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1515   } else {
1516     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1517     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1518     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1519   }
1520 
1521   /* now call MatCreateSubMatrix_MPIBAIJ() */
1522   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1523   if (call == MAT_INITIAL_MATRIX) {
1524     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1525     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1531 {
1532   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1537   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1542 {
1543   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1544   Mat            A  = a->A,B = a->B;
1545   PetscErrorCode ierr;
1546   PetscReal      isend[5],irecv[5];
1547 
1548   PetscFunctionBegin;
1549   info->block_size = (PetscReal)matin->rmap->bs;
1550 
1551   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1552 
1553   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1554   isend[3] = info->memory;  isend[4] = info->mallocs;
1555 
1556   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1557 
1558   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1559   isend[3] += info->memory;  isend[4] += info->mallocs;
1560   if (flag == MAT_LOCAL) {
1561     info->nz_used      = isend[0];
1562     info->nz_allocated = isend[1];
1563     info->nz_unneeded  = isend[2];
1564     info->memory       = isend[3];
1565     info->mallocs      = isend[4];
1566   } else if (flag == MAT_GLOBAL_MAX) {
1567     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1568 
1569     info->nz_used      = irecv[0];
1570     info->nz_allocated = irecv[1];
1571     info->nz_unneeded  = irecv[2];
1572     info->memory       = irecv[3];
1573     info->mallocs      = irecv[4];
1574   } else if (flag == MAT_GLOBAL_SUM) {
1575     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1576 
1577     info->nz_used      = irecv[0];
1578     info->nz_allocated = irecv[1];
1579     info->nz_unneeded  = irecv[2];
1580     info->memory       = irecv[3];
1581     info->mallocs      = irecv[4];
1582   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1583   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1584   info->fill_ratio_needed = 0;
1585   info->factor_mallocs    = 0;
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1590 {
1591   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1592   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   switch (op) {
1597   case MAT_NEW_NONZERO_LOCATIONS:
1598   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1599   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1600   case MAT_KEEP_NONZERO_PATTERN:
1601   case MAT_SUBMAT_SINGLEIS:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     MatCheckPreallocated(A,1);
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     MatCheckPreallocated(A,1);
1609     a->roworiented = flg;
1610 
1611     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1612     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1613     break;
1614   case MAT_NEW_DIAGONALS:
1615     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1616     break;
1617   case MAT_IGNORE_OFF_PROC_ENTRIES:
1618     a->donotstash = flg;
1619     break;
1620   case MAT_USE_HASH_TABLE:
1621     a->ht_flag = flg;
1622     break;
1623   case MAT_HERMITIAN:
1624     MatCheckPreallocated(A,1);
1625     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1626     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1627 
1628     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1629     break;
1630   case MAT_SPD:
1631     A->spd_set = PETSC_TRUE;
1632     A->spd     = flg;
1633     if (flg) {
1634       A->symmetric                  = PETSC_TRUE;
1635       A->structurally_symmetric     = PETSC_TRUE;
1636       A->symmetric_set              = PETSC_TRUE;
1637       A->structurally_symmetric_set = PETSC_TRUE;
1638     }
1639     break;
1640   case MAT_SYMMETRIC:
1641     MatCheckPreallocated(A,1);
1642     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1643     break;
1644   case MAT_STRUCTURALLY_SYMMETRIC:
1645     MatCheckPreallocated(A,1);
1646     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1647     break;
1648   case MAT_SYMMETRY_ETERNAL:
1649     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1650     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1651     break;
1652   case MAT_IGNORE_LOWER_TRIANGULAR:
1653     aA->ignore_ltriangular = flg;
1654     break;
1655   case MAT_ERROR_LOWER_TRIANGULAR:
1656     aA->ignore_ltriangular = flg;
1657     break;
1658   case MAT_GETROW_UPPERTRIANGULAR:
1659     aA->getrow_utriangular = flg;
1660     break;
1661   default:
1662     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1668 {
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   if (reuse == MAT_INITIAL_MATRIX) {
1673     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1674   }  else if (reuse == MAT_REUSE_MATRIX) {
1675     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1681 {
1682   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1683   Mat            a     = baij->A, b=baij->B;
1684   PetscErrorCode ierr;
1685   PetscInt       nv,m,n;
1686   PetscBool      flg;
1687 
1688   PetscFunctionBegin;
1689   if (ll != rr) {
1690     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1691     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1692   }
1693   if (!ll) PetscFunctionReturn(0);
1694 
1695   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1696   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1697 
1698   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1699   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1700 
1701   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1702 
1703   /* left diagonalscale the off-diagonal part */
1704   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1705 
1706   /* scale the diagonal part */
1707   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1708 
1709   /* right diagonalscale the off-diagonal part */
1710   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1711   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1716 {
1717   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1726 
1727 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1728 {
1729   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1730   Mat            a,b,c,d;
1731   PetscBool      flg;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   a = matA->A; b = matA->B;
1736   c = matB->A; d = matB->B;
1737 
1738   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1739   if (flg) {
1740     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1741   }
1742   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1747 {
1748   PetscErrorCode ierr;
1749   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1750   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1751 
1752   PetscFunctionBegin;
1753   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1754   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1755     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1756     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1757     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1758   } else {
1759     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1760     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1761   }
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1766 {
1767   PetscErrorCode ierr;
1768 
1769   PetscFunctionBegin;
1770   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1775 {
1776   PetscErrorCode ierr;
1777   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1778   PetscBLASInt   bnz,one=1;
1779   Mat_SeqSBAIJ   *xa,*ya;
1780   Mat_SeqBAIJ    *xb,*yb;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1786     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1787     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1788     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1789     xb   = (Mat_SeqBAIJ*)xx->B->data;
1790     yb   = (Mat_SeqBAIJ*)yy->B->data;
1791     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1792     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1793     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1794   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1795     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1796     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1797     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1798   } else {
1799     Mat      B;
1800     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1801     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1802     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1803     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1804     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1805     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1806     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1807     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1808     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1809     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1810     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1811     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1812     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1813     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1814     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1815     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1816     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1817     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1818     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1819     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1820   }
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1825 {
1826   PetscErrorCode ierr;
1827   PetscInt       i;
1828   PetscBool      flg;
1829 
1830   PetscFunctionBegin;
1831   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1832   for (i=0; i<n; i++) {
1833     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1834     if (!flg) {
1835       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1836     }
1837   }
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1842 {
1843   PetscErrorCode ierr;
1844   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1845   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1846 
1847   PetscFunctionBegin;
1848   if (!Y->preallocated) {
1849     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1850   } else if (!aij->nz) {
1851     PetscInt nonew = aij->nonew;
1852     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1853     aij->nonew = nonew;
1854   }
1855   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1860 {
1861   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1866   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1867   if (d) {
1868     PetscInt rstart;
1869     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1870     *d += rstart/A->rmap->bs;
1871 
1872   }
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1877 {
1878   PetscFunctionBegin;
1879   *a = ((Mat_MPISBAIJ*)A->data)->A;
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 /* -------------------------------------------------------------------*/
1884 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1885                                        MatGetRow_MPISBAIJ,
1886                                        MatRestoreRow_MPISBAIJ,
1887                                        MatMult_MPISBAIJ,
1888                                /*  4*/ MatMultAdd_MPISBAIJ,
1889                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1890                                        MatMultAdd_MPISBAIJ,
1891                                        0,
1892                                        0,
1893                                        0,
1894                                /* 10*/ 0,
1895                                        0,
1896                                        0,
1897                                        MatSOR_MPISBAIJ,
1898                                        MatTranspose_MPISBAIJ,
1899                                /* 15*/ MatGetInfo_MPISBAIJ,
1900                                        MatEqual_MPISBAIJ,
1901                                        MatGetDiagonal_MPISBAIJ,
1902                                        MatDiagonalScale_MPISBAIJ,
1903                                        MatNorm_MPISBAIJ,
1904                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1905                                        MatAssemblyEnd_MPISBAIJ,
1906                                        MatSetOption_MPISBAIJ,
1907                                        MatZeroEntries_MPISBAIJ,
1908                                /* 24*/ 0,
1909                                        0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                /* 29*/ MatSetUp_MPISBAIJ,
1914                                        0,
1915                                        0,
1916                                        MatGetDiagonalBlock_MPISBAIJ,
1917                                        0,
1918                                /* 34*/ MatDuplicate_MPISBAIJ,
1919                                        0,
1920                                        0,
1921                                        0,
1922                                        0,
1923                                /* 39*/ MatAXPY_MPISBAIJ,
1924                                        MatCreateSubMatrices_MPISBAIJ,
1925                                        MatIncreaseOverlap_MPISBAIJ,
1926                                        MatGetValues_MPISBAIJ,
1927                                        MatCopy_MPISBAIJ,
1928                                /* 44*/ 0,
1929                                        MatScale_MPISBAIJ,
1930                                        MatShift_MPISBAIJ,
1931                                        0,
1932                                        0,
1933                                /* 49*/ 0,
1934                                        0,
1935                                        0,
1936                                        0,
1937                                        0,
1938                                /* 54*/ 0,
1939                                        0,
1940                                        MatSetUnfactored_MPISBAIJ,
1941                                        0,
1942                                        MatSetValuesBlocked_MPISBAIJ,
1943                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1944                                        0,
1945                                        0,
1946                                        0,
1947                                        0,
1948                                /* 64*/ 0,
1949                                        0,
1950                                        0,
1951                                        0,
1952                                        0,
1953                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1954                                        0,
1955                                        0,
1956                                        0,
1957                                        0,
1958                                /* 74*/ 0,
1959                                        0,
1960                                        0,
1961                                        0,
1962                                        0,
1963                                /* 79*/ 0,
1964                                        0,
1965                                        0,
1966                                        0,
1967                                        MatLoad_MPISBAIJ,
1968                                /* 84*/ 0,
1969                                        0,
1970                                        0,
1971                                        0,
1972                                        0,
1973                                /* 89*/ 0,
1974                                        0,
1975                                        0,
1976                                        0,
1977                                        0,
1978                                /* 94*/ 0,
1979                                        0,
1980                                        0,
1981                                        0,
1982                                        0,
1983                                /* 99*/ 0,
1984                                        0,
1985                                        0,
1986                                        0,
1987                                        0,
1988                                /*104*/ 0,
1989                                        MatRealPart_MPISBAIJ,
1990                                        MatImaginaryPart_MPISBAIJ,
1991                                        MatGetRowUpperTriangular_MPISBAIJ,
1992                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1993                                /*109*/ 0,
1994                                        0,
1995                                        0,
1996                                        0,
1997                                        MatMissingDiagonal_MPISBAIJ,
1998                                /*114*/ 0,
1999                                        0,
2000                                        0,
2001                                        0,
2002                                        0,
2003                                /*119*/ 0,
2004                                        0,
2005                                        0,
2006                                        0,
2007                                        0,
2008                                /*124*/ 0,
2009                                        0,
2010                                        0,
2011                                        0,
2012                                        0,
2013                                /*129*/ 0,
2014                                        0,
2015                                        0,
2016                                        0,
2017                                        0,
2018                                /*134*/ 0,
2019                                        0,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                /*139*/ MatSetBlockSizes_Default,
2024                                        0,
2025                                        0,
2026                                        0,
2027                                        0,
2028                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2029 };
2030 
2031 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2032 {
2033   Mat_MPISBAIJ   *b;
2034   PetscErrorCode ierr;
2035   PetscInt       i,mbs,Mbs;
2036 
2037   PetscFunctionBegin;
2038   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2039   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2040   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2041   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2042 
2043   b   = (Mat_MPISBAIJ*)B->data;
2044   mbs = B->rmap->n/bs;
2045   Mbs = B->rmap->N/bs;
2046   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2047 
2048   B->rmap->bs = bs;
2049   b->bs2      = bs*bs;
2050   b->mbs      = mbs;
2051   b->Mbs      = Mbs;
2052   b->nbs      = B->cmap->n/bs;
2053   b->Nbs      = B->cmap->N/bs;
2054 
2055   for (i=0; i<=b->size; i++) {
2056     b->rangebs[i] = B->rmap->range[i]/bs;
2057   }
2058   b->rstartbs = B->rmap->rstart/bs;
2059   b->rendbs   = B->rmap->rend/bs;
2060 
2061   b->cstartbs = B->cmap->rstart/bs;
2062   b->cendbs   = B->cmap->rend/bs;
2063 
2064 #if defined(PETSC_USE_CTABLE)
2065   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2066 #else
2067   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2068 #endif
2069   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2070   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2071   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2072   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2073   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2074   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2075   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2076   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2077   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2078 
2079   /* Because the B will have been resized we simply destroy it and create a new one each time */
2080   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2081   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2082   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2083   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2084   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2085 
2086   if (!B->preallocated) {
2087     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2088     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2089     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2090     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2091     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2092   }
2093 
2094   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2095   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2096 
2097   B->preallocated  = PETSC_TRUE;
2098   B->was_assembled = PETSC_FALSE;
2099   B->assembled     = PETSC_FALSE;
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2104 {
2105   PetscInt       m,rstart,cstart,cend;
2106   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2107   const PetscInt *JJ    =0;
2108   PetscScalar    *values=0;
2109   PetscErrorCode ierr;
2110 
2111   PetscFunctionBegin;
2112   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2113   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2114   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2115   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2116   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2117   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2118   m      = B->rmap->n/bs;
2119   rstart = B->rmap->rstart/bs;
2120   cstart = B->cmap->rstart/bs;
2121   cend   = B->cmap->rend/bs;
2122 
2123   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2124   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2125   for (i=0; i<m; i++) {
2126     nz = ii[i+1] - ii[i];
2127     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2128     nz_max = PetscMax(nz_max,nz);
2129     JJ     = jj + ii[i];
2130     for (j=0; j<nz; j++) {
2131       if (*JJ >= cstart) break;
2132       JJ++;
2133     }
2134     d = 0;
2135     for (; j<nz; j++) {
2136       if (*JJ++ >= cend) break;
2137       d++;
2138     }
2139     d_nnz[i] = d;
2140     o_nnz[i] = nz - d;
2141   }
2142   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2143   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2144 
2145   values = (PetscScalar*)V;
2146   if (!values) {
2147     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2148     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2149   }
2150   for (i=0; i<m; i++) {
2151     PetscInt          row    = i + rstart;
2152     PetscInt          ncols  = ii[i+1] - ii[i];
2153     const PetscInt    *icols = jj + ii[i];
2154     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2155     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2156   }
2157 
2158   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2159   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 /*MC
2166    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2167    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2168    the matrix is stored.
2169 
2170   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2171   can call MatSetOption(Mat, MAT_HERMITIAN);
2172 
2173    Options Database Keys:
2174 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2175 
2176   Level: beginner
2177 
2178 .seealso: MatCreateMPISBAIJ
2179 M*/
2180 
2181 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2182 
2183 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2184 {
2185   Mat_MPISBAIJ   *b;
2186   PetscErrorCode ierr;
2187   PetscBool      flg = PETSC_FALSE;
2188 
2189   PetscFunctionBegin;
2190   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2191   B->data = (void*)b;
2192   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2193 
2194   B->ops->destroy = MatDestroy_MPISBAIJ;
2195   B->ops->view    = MatView_MPISBAIJ;
2196   B->assembled    = PETSC_FALSE;
2197   B->insertmode   = NOT_SET_VALUES;
2198 
2199   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2200   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2201 
2202   /* build local table of row and column ownerships */
2203   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2204 
2205   /* build cache for off array entries formed */
2206   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2207 
2208   b->donotstash  = PETSC_FALSE;
2209   b->colmap      = NULL;
2210   b->garray      = NULL;
2211   b->roworiented = PETSC_TRUE;
2212 
2213   /* stuff used in block assembly */
2214   b->barray = 0;
2215 
2216   /* stuff used for matrix vector multiply */
2217   b->lvec    = 0;
2218   b->Mvctx   = 0;
2219   b->slvec0  = 0;
2220   b->slvec0b = 0;
2221   b->slvec1  = 0;
2222   b->slvec1a = 0;
2223   b->slvec1b = 0;
2224   b->sMvctx  = 0;
2225 
2226   /* stuff for MatGetRow() */
2227   b->rowindices   = 0;
2228   b->rowvalues    = 0;
2229   b->getrowactive = PETSC_FALSE;
2230 
2231   /* hash table stuff */
2232   b->ht           = 0;
2233   b->hd           = 0;
2234   b->ht_size      = 0;
2235   b->ht_flag      = PETSC_FALSE;
2236   b->ht_fact      = 0;
2237   b->ht_total_ct  = 0;
2238   b->ht_insert_ct = 0;
2239 
2240   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2241   b->ijonly = PETSC_FALSE;
2242 
2243   b->in_loc = 0;
2244   b->v_loc  = 0;
2245   b->n_loc  = 0;
2246 
2247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2251 #if defined(PETSC_HAVE_ELEMENTAL)
2252   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2253 #endif
2254 
2255   B->symmetric                  = PETSC_TRUE;
2256   B->structurally_symmetric     = PETSC_TRUE;
2257   B->symmetric_set              = PETSC_TRUE;
2258   B->structurally_symmetric_set = PETSC_TRUE;
2259 
2260   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2261   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2262   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2263   if (flg) {
2264     PetscReal fact = 1.39;
2265     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2266     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2267     if (fact <= 1.0) fact = 1.39;
2268     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2269     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2270   }
2271   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 /*MC
2276    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2277 
2278    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2279    and MATMPISBAIJ otherwise.
2280 
2281    Options Database Keys:
2282 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2283 
2284   Level: beginner
2285 
2286 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2287 M*/
2288 
2289 /*@C
2290    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2291    the user should preallocate the matrix storage by setting the parameters
2292    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2293    performance can be increased by more than a factor of 50.
2294 
2295    Collective on Mat
2296 
2297    Input Parameters:
2298 +  B - the matrix
2299 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2300           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2301 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2302            submatrix  (same for all local rows)
2303 .  d_nnz - array containing the number of block nonzeros in the various block rows
2304            in the upper triangular and diagonal part of the in diagonal portion of the local
2305            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2306            for the diagonal entry and set a value even if it is zero.
2307 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2308            submatrix (same for all local rows).
2309 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2310            off-diagonal portion of the local submatrix that is right of the diagonal
2311            (possibly different for each block row) or NULL.
2312 
2313 
2314    Options Database Keys:
2315 .   -mat_no_unroll - uses code that does not unroll the loops in the
2316                      block calculations (much slower)
2317 .   -mat_block_size - size of the blocks to use
2318 
2319    Notes:
2320 
2321    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2322    than it must be used on all processors that share the object for that argument.
2323 
2324    If the *_nnz parameter is given then the *_nz parameter is ignored
2325 
2326    Storage Information:
2327    For a square global matrix we define each processor's diagonal portion
2328    to be its local rows and the corresponding columns (a square submatrix);
2329    each processor's off-diagonal portion encompasses the remainder of the
2330    local matrix (a rectangular submatrix).
2331 
2332    The user can specify preallocated storage for the diagonal part of
2333    the local submatrix with either d_nz or d_nnz (not both).  Set
2334    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2335    memory allocation.  Likewise, specify preallocated storage for the
2336    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2337 
2338    You can call MatGetInfo() to get information on how effective the preallocation was;
2339    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2340    You can also run with the option -info and look for messages with the string
2341    malloc in them to see if additional memory allocation was needed.
2342 
2343    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2344    the figure below we depict these three local rows and all columns (0-11).
2345 
2346 .vb
2347            0 1 2 3 4 5 6 7 8 9 10 11
2348           --------------------------
2349    row 3  |. . . d d d o o o o  o  o
2350    row 4  |. . . d d d o o o o  o  o
2351    row 5  |. . . d d d o o o o  o  o
2352           --------------------------
2353 .ve
2354 
2355    Thus, any entries in the d locations are stored in the d (diagonal)
2356    submatrix, and any entries in the o locations are stored in the
2357    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2358    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2359 
2360    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2361    plus the diagonal part of the d matrix,
2362    and o_nz should indicate the number of block nonzeros per row in the o matrix
2363 
2364    In general, for PDE problems in which most nonzeros are near the diagonal,
2365    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2366    or you will get TERRIBLE performance; see the users' manual chapter on
2367    matrices.
2368 
2369    Level: intermediate
2370 
2371 .keywords: matrix, block, aij, compressed row, sparse, parallel
2372 
2373 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2374 @*/
2375 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2376 {
2377   PetscErrorCode ierr;
2378 
2379   PetscFunctionBegin;
2380   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2381   PetscValidType(B,1);
2382   PetscValidLogicalCollectiveInt(B,bs,2);
2383   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 /*@C
2388    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2389    (block compressed row).  For good matrix assembly performance
2390    the user should preallocate the matrix storage by setting the parameters
2391    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2392    performance can be increased by more than a factor of 50.
2393 
2394    Collective on MPI_Comm
2395 
2396    Input Parameters:
2397 +  comm - MPI communicator
2398 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2399           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2400 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2401            This value should be the same as the local size used in creating the
2402            y vector for the matrix-vector product y = Ax.
2403 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2404            This value should be the same as the local size used in creating the
2405            x vector for the matrix-vector product y = Ax.
2406 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2407 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2408 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2409            submatrix  (same for all local rows)
2410 .  d_nnz - array containing the number of block nonzeros in the various block rows
2411            in the upper triangular portion of the in diagonal portion of the local
2412            (possibly different for each block block row) or NULL.
2413            If you plan to factor the matrix you must leave room for the diagonal entry and
2414            set its value even if it is zero.
2415 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2416            submatrix (same for all local rows).
2417 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2418            off-diagonal portion of the local submatrix (possibly different for
2419            each block row) or NULL.
2420 
2421    Output Parameter:
2422 .  A - the matrix
2423 
2424    Options Database Keys:
2425 .   -mat_no_unroll - uses code that does not unroll the loops in the
2426                      block calculations (much slower)
2427 .   -mat_block_size - size of the blocks to use
2428 .   -mat_mpi - use the parallel matrix data structures even on one processor
2429                (defaults to using SeqBAIJ format on one processor)
2430 
2431    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2432    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2433    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2434 
2435    Notes:
2436    The number of rows and columns must be divisible by blocksize.
2437    This matrix type does not support complex Hermitian operation.
2438 
2439    The user MUST specify either the local or global matrix dimensions
2440    (possibly both).
2441 
2442    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2443    than it must be used on all processors that share the object for that argument.
2444 
2445    If the *_nnz parameter is given then the *_nz parameter is ignored
2446 
2447    Storage Information:
2448    For a square global matrix we define each processor's diagonal portion
2449    to be its local rows and the corresponding columns (a square submatrix);
2450    each processor's off-diagonal portion encompasses the remainder of the
2451    local matrix (a rectangular submatrix).
2452 
2453    The user can specify preallocated storage for the diagonal part of
2454    the local submatrix with either d_nz or d_nnz (not both).  Set
2455    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2456    memory allocation.  Likewise, specify preallocated storage for the
2457    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2458 
2459    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2460    the figure below we depict these three local rows and all columns (0-11).
2461 
2462 .vb
2463            0 1 2 3 4 5 6 7 8 9 10 11
2464           --------------------------
2465    row 3  |. . . d d d o o o o  o  o
2466    row 4  |. . . d d d o o o o  o  o
2467    row 5  |. . . d d d o o o o  o  o
2468           --------------------------
2469 .ve
2470 
2471    Thus, any entries in the d locations are stored in the d (diagonal)
2472    submatrix, and any entries in the o locations are stored in the
2473    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2474    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2475 
2476    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2477    plus the diagonal part of the d matrix,
2478    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2479    In general, for PDE problems in which most nonzeros are near the diagonal,
2480    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2481    or you will get TERRIBLE performance; see the users' manual chapter on
2482    matrices.
2483 
2484    Level: intermediate
2485 
2486 .keywords: matrix, block, aij, compressed row, sparse, parallel
2487 
2488 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2489 @*/
2490 
2491 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2492 {
2493   PetscErrorCode ierr;
2494   PetscMPIInt    size;
2495 
2496   PetscFunctionBegin;
2497   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2498   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2499   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2500   if (size > 1) {
2501     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2502     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2503   } else {
2504     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2505     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2506   }
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 
2511 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2512 {
2513   Mat            mat;
2514   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2515   PetscErrorCode ierr;
2516   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2517   PetscScalar    *array;
2518 
2519   PetscFunctionBegin;
2520   *newmat = 0;
2521 
2522   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2523   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2524   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2525   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2526   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2527   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2528 
2529   mat->factortype   = matin->factortype;
2530   mat->preallocated = PETSC_TRUE;
2531   mat->assembled    = PETSC_TRUE;
2532   mat->insertmode   = NOT_SET_VALUES;
2533 
2534   a      = (Mat_MPISBAIJ*)mat->data;
2535   a->bs2 = oldmat->bs2;
2536   a->mbs = oldmat->mbs;
2537   a->nbs = oldmat->nbs;
2538   a->Mbs = oldmat->Mbs;
2539   a->Nbs = oldmat->Nbs;
2540 
2541 
2542   a->size         = oldmat->size;
2543   a->rank         = oldmat->rank;
2544   a->donotstash   = oldmat->donotstash;
2545   a->roworiented  = oldmat->roworiented;
2546   a->rowindices   = 0;
2547   a->rowvalues    = 0;
2548   a->getrowactive = PETSC_FALSE;
2549   a->barray       = 0;
2550   a->rstartbs     = oldmat->rstartbs;
2551   a->rendbs       = oldmat->rendbs;
2552   a->cstartbs     = oldmat->cstartbs;
2553   a->cendbs       = oldmat->cendbs;
2554 
2555   /* hash table stuff */
2556   a->ht           = 0;
2557   a->hd           = 0;
2558   a->ht_size      = 0;
2559   a->ht_flag      = oldmat->ht_flag;
2560   a->ht_fact      = oldmat->ht_fact;
2561   a->ht_total_ct  = 0;
2562   a->ht_insert_ct = 0;
2563 
2564   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2565   if (oldmat->colmap) {
2566 #if defined(PETSC_USE_CTABLE)
2567     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2568 #else
2569     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2570     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2571     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2572 #endif
2573   } else a->colmap = 0;
2574 
2575   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2576     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2577     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2578     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2579   } else a->garray = 0;
2580 
2581   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2582   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2583   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2584   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2585   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2586 
2587   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2588   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2589   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2590   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2591 
2592   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2593   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2594   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2595   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2596   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2597   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2598   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2599   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2600   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2601   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2602   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2603   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2604   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2605 
2606   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2607   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2608   a->sMvctx = oldmat->sMvctx;
2609   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2610 
2611   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2612   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2613   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2614   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2615   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2616   *newmat = mat;
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2621 {
2622   PetscErrorCode ierr;
2623   PetscInt       i,nz,j,rstart,rend;
2624   PetscScalar    *vals,*buf;
2625   MPI_Comm       comm;
2626   MPI_Status     status;
2627   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2628   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2629   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2630   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2631   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2632   PetscInt       dcount,kmax,k,nzcount,tmp;
2633   int            fd;
2634 
2635   PetscFunctionBegin;
2636   /* force binary viewer to load .info file if it has not yet done so */
2637   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2638   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2639   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2640   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2641   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2642   if (bs < 0) bs = 1;
2643 
2644   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2645   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2646   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2647   if (!rank) {
2648     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2649     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2650     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2651   }
2652 
2653   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2654   M    = header[1];
2655   N    = header[2];
2656 
2657   /* If global sizes are set, check if they are consistent with that given in the file */
2658   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2659   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2660 
2661   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2662 
2663   /*
2664      This code adds extra rows to make sure the number of rows is
2665      divisible by the blocksize
2666   */
2667   Mbs        = M/bs;
2668   extra_rows = bs - M + bs*(Mbs);
2669   if (extra_rows == bs) extra_rows = 0;
2670   else                  Mbs++;
2671   if (extra_rows &&!rank) {
2672     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2673   }
2674 
2675   /* determine ownership of all rows */
2676   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2677     mbs = Mbs/size + ((Mbs % size) > rank);
2678     m   = mbs*bs;
2679   } else { /* User Set */
2680     m   = newmat->rmap->n;
2681     mbs = m/bs;
2682   }
2683   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2684   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2685   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2686   rowners[0] = 0;
2687   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2688   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2689   rstart = rowners[rank];
2690   rend   = rowners[rank+1];
2691 
2692   /* distribute row lengths to all processors */
2693   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2694   if (!rank) {
2695     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2696     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2697     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2698     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2699     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2700     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2701     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2702   } else {
2703     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2704   }
2705 
2706   if (!rank) {   /* procs[0] */
2707     /* calculate the number of nonzeros on each processor */
2708     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2709     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2710     for (i=0; i<size; i++) {
2711       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2712         procsnz[i] += rowlengths[j];
2713       }
2714     }
2715     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2716 
2717     /* determine max buffer needed and allocate it */
2718     maxnz = 0;
2719     for (i=0; i<size; i++) {
2720       maxnz = PetscMax(maxnz,procsnz[i]);
2721     }
2722     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2723 
2724     /* read in my part of the matrix column indices  */
2725     nz     = procsnz[0];
2726     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2727     mycols = ibuf;
2728     if (size == 1) nz -= extra_rows;
2729     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2730     if (size == 1) {
2731       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2732     }
2733 
2734     /* read in every ones (except the last) and ship off */
2735     for (i=1; i<size-1; i++) {
2736       nz   = procsnz[i];
2737       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2738       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2739     }
2740     /* read in the stuff for the last proc */
2741     if (size != 1) {
2742       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2743       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2744       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2745       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2746     }
2747     ierr = PetscFree(cols);CHKERRQ(ierr);
2748   } else {  /* procs[i], i>0 */
2749     /* determine buffer space needed for message */
2750     nz = 0;
2751     for (i=0; i<m; i++) nz += locrowlens[i];
2752     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2753     mycols = ibuf;
2754     /* receive message of column indices*/
2755     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2756     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2757     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2758   }
2759 
2760   /* loop over local rows, determining number of off diagonal entries */
2761   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2762   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2763   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2764   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2765   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2766   rowcount = 0;
2767   nzcount  = 0;
2768   for (i=0; i<mbs; i++) {
2769     dcount  = 0;
2770     odcount = 0;
2771     for (j=0; j<bs; j++) {
2772       kmax = locrowlens[rowcount];
2773       for (k=0; k<kmax; k++) {
2774         tmp = mycols[nzcount++]/bs; /* block col. index */
2775         if (!mask[tmp]) {
2776           mask[tmp] = 1;
2777           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2778           else masked1[dcount++] = tmp; /* entry in diag portion */
2779         }
2780       }
2781       rowcount++;
2782     }
2783 
2784     dlens[i]  = dcount;  /* d_nzz[i] */
2785     odlens[i] = odcount; /* o_nzz[i] */
2786 
2787     /* zero out the mask elements we set */
2788     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2789     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2790   }
2791   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2792   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2793   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2794 
2795   if (!rank) {
2796     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2797     /* read in my part of the matrix numerical values  */
2798     nz     = procsnz[0];
2799     vals   = buf;
2800     mycols = ibuf;
2801     if (size == 1) nz -= extra_rows;
2802     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2803     if (size == 1) {
2804       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2805     }
2806 
2807     /* insert into matrix */
2808     jj = rstart*bs;
2809     for (i=0; i<m; i++) {
2810       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2811       mycols += locrowlens[i];
2812       vals   += locrowlens[i];
2813       jj++;
2814     }
2815 
2816     /* read in other processors (except the last one) and ship out */
2817     for (i=1; i<size-1; i++) {
2818       nz   = procsnz[i];
2819       vals = buf;
2820       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2821       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2822     }
2823     /* the last proc */
2824     if (size != 1) {
2825       nz   = procsnz[i] - extra_rows;
2826       vals = buf;
2827       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2828       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2829       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2830     }
2831     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2832 
2833   } else {
2834     /* receive numeric values */
2835     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2836 
2837     /* receive message of values*/
2838     vals   = buf;
2839     mycols = ibuf;
2840     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2841     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2842     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2843 
2844     /* insert into matrix */
2845     jj = rstart*bs;
2846     for (i=0; i<m; i++) {
2847       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2848       mycols += locrowlens[i];
2849       vals   += locrowlens[i];
2850       jj++;
2851     }
2852   }
2853 
2854   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2855   ierr = PetscFree(buf);CHKERRQ(ierr);
2856   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2857   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2858   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2859   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2860   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 /*XXXXX@
2866    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2867 
2868    Input Parameters:
2869 .  mat  - the matrix
2870 .  fact - factor
2871 
2872    Not Collective on Mat, each process can have a different hash factor
2873 
2874    Level: advanced
2875 
2876   Notes:
2877    This can also be set by the command line option: -mat_use_hash_table fact
2878 
2879 .keywords: matrix, hashtable, factor, HT
2880 
2881 .seealso: MatSetOption()
2882 @XXXXX*/
2883 
2884 
2885 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2886 {
2887   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2888   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2889   PetscReal      atmp;
2890   PetscReal      *work,*svalues,*rvalues;
2891   PetscErrorCode ierr;
2892   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2893   PetscMPIInt    rank,size;
2894   PetscInt       *rowners_bs,dest,count,source;
2895   PetscScalar    *va;
2896   MatScalar      *ba;
2897   MPI_Status     stat;
2898 
2899   PetscFunctionBegin;
2900   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2901   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2902   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2903 
2904   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2905   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2906 
2907   bs  = A->rmap->bs;
2908   mbs = a->mbs;
2909   Mbs = a->Mbs;
2910   ba  = b->a;
2911   bi  = b->i;
2912   bj  = b->j;
2913 
2914   /* find ownerships */
2915   rowners_bs = A->rmap->range;
2916 
2917   /* each proc creates an array to be distributed */
2918   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2919   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2920 
2921   /* row_max for B */
2922   if (rank != size-1) {
2923     for (i=0; i<mbs; i++) {
2924       ncols = bi[1] - bi[0]; bi++;
2925       brow  = bs*i;
2926       for (j=0; j<ncols; j++) {
2927         bcol = bs*(*bj);
2928         for (kcol=0; kcol<bs; kcol++) {
2929           col  = bcol + kcol;                /* local col index */
2930           col += rowners_bs[rank+1];      /* global col index */
2931           for (krow=0; krow<bs; krow++) {
2932             atmp = PetscAbsScalar(*ba); ba++;
2933             row  = brow + krow;   /* local row index */
2934             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2935             if (work[col] < atmp) work[col] = atmp;
2936           }
2937         }
2938         bj++;
2939       }
2940     }
2941 
2942     /* send values to its owners */
2943     for (dest=rank+1; dest<size; dest++) {
2944       svalues = work + rowners_bs[dest];
2945       count   = rowners_bs[dest+1]-rowners_bs[dest];
2946       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2947     }
2948   }
2949 
2950   /* receive values */
2951   if (rank) {
2952     rvalues = work;
2953     count   = rowners_bs[rank+1]-rowners_bs[rank];
2954     for (source=0; source<rank; source++) {
2955       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2956       /* process values */
2957       for (i=0; i<count; i++) {
2958         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2959       }
2960     }
2961   }
2962 
2963   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2964   ierr = PetscFree(work);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2969 {
2970   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2971   PetscErrorCode    ierr;
2972   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2973   PetscScalar       *x,*ptr,*from;
2974   Vec               bb1;
2975   const PetscScalar *b;
2976 
2977   PetscFunctionBegin;
2978   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2979   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2980 
2981   if (flag == SOR_APPLY_UPPER) {
2982     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2983     PetscFunctionReturn(0);
2984   }
2985 
2986   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2987     if (flag & SOR_ZERO_INITIAL_GUESS) {
2988       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2989       its--;
2990     }
2991 
2992     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2993     while (its--) {
2994 
2995       /* lower triangular part: slvec0b = - B^T*xx */
2996       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2997 
2998       /* copy xx into slvec0a */
2999       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3000       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3001       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3002       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3003 
3004       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3005 
3006       /* copy bb into slvec1a */
3007       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3008       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3009       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3010       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3011 
3012       /* set slvec1b = 0 */
3013       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3014 
3015       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3016       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3017       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3018       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3019 
3020       /* upper triangular part: bb1 = bb1 - B*x */
3021       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3022 
3023       /* local diagonal sweep */
3024       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3025     }
3026     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3027   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3028     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3029   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3030     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3031   } else if (flag & SOR_EISENSTAT) {
3032     Vec               xx1;
3033     PetscBool         hasop;
3034     const PetscScalar *diag;
3035     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3036     PetscInt          i,n;
3037 
3038     if (!mat->xx1) {
3039       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3040       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3041     }
3042     xx1 = mat->xx1;
3043     bb1 = mat->bb1;
3044 
3045     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3046 
3047     if (!mat->diag) {
3048       /* this is wrong for same matrix with new nonzero values */
3049       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3050       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3051     }
3052     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3053 
3054     if (hasop) {
3055       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3056       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3057     } else {
3058       /*
3059           These two lines are replaced by code that may be a bit faster for a good compiler
3060       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3061       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3062       */
3063       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3064       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3065       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3066       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3067       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3068       if (omega == 1.0) {
3069         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3070         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3071       } else {
3072         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3073         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3074       }
3075       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3076       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3077       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3078       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3079     }
3080 
3081     /* multiply off-diagonal portion of matrix */
3082     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3083     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3084     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3085     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3086     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3087     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3088     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3089     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3090     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3091     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3092 
3093     /* local sweep */
3094     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3095     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3096   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3101 {
3102   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3103   PetscErrorCode ierr;
3104   Vec            lvec1,bb1;
3105 
3106   PetscFunctionBegin;
3107   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3108   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3109 
3110   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3111     if (flag & SOR_ZERO_INITIAL_GUESS) {
3112       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3113       its--;
3114     }
3115 
3116     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3117     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3118     while (its--) {
3119       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3120 
3121       /* lower diagonal part: bb1 = bb - B^T*xx */
3122       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3123       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3124 
3125       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3126       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3127       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3128 
3129       /* upper diagonal part: bb1 = bb1 - B*x */
3130       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3131       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3132 
3133       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3134 
3135       /* diagonal sweep */
3136       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3137     }
3138     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3139     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3140   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3141   PetscFunctionReturn(0);
3142 }
3143 
3144 /*@
3145      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3146          CSR format the local rows.
3147 
3148    Collective on MPI_Comm
3149 
3150    Input Parameters:
3151 +  comm - MPI communicator
3152 .  bs - the block size, only a block size of 1 is supported
3153 .  m - number of local rows (Cannot be PETSC_DECIDE)
3154 .  n - This value should be the same as the local size used in creating the
3155        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3156        calculated if N is given) For square matrices n is almost always m.
3157 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3158 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3159 .   i - row indices
3160 .   j - column indices
3161 -   a - matrix values
3162 
3163    Output Parameter:
3164 .   mat - the matrix
3165 
3166    Level: intermediate
3167 
3168    Notes:
3169        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3170      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3171      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3172 
3173        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3174 
3175 .keywords: matrix, aij, compressed row, sparse, parallel
3176 
3177 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3178           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3179 @*/
3180 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3181 {
3182   PetscErrorCode ierr;
3183 
3184 
3185   PetscFunctionBegin;
3186   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3187   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3188   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3189   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3190   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3191   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3192   PetscFunctionReturn(0);
3193 }
3194 
3195 
3196 /*@C
3197    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3198    (the default parallel PETSc format).
3199 
3200    Collective on MPI_Comm
3201 
3202    Input Parameters:
3203 +  B - the matrix
3204 .  bs - the block size
3205 .  i - the indices into j for the start of each local row (starts with zero)
3206 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3207 -  v - optional values in the matrix
3208 
3209    Level: developer
3210 
3211 .keywords: matrix, aij, compressed row, sparse, parallel
3212 
3213 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3214 @*/
3215 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3216 {
3217   PetscErrorCode ierr;
3218 
3219   PetscFunctionBegin;
3220   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3225 {
3226   PetscErrorCode ierr;
3227   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3228   PetscInt       *indx;
3229   PetscScalar    *values;
3230 
3231   PetscFunctionBegin;
3232   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3233   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3234     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3235     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3236     PetscInt       *bindx,rmax=a->rmax,j;
3237 
3238     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3239     mbs = m/bs; Nbs = N/cbs;
3240     if (n == PETSC_DECIDE) {
3241       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3242     }
3243     /* Check sum(n) = Nbs */
3244     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3245     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3246 
3247     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3248     rstart -= mbs;
3249 
3250     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3251     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3252     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3253     for (i=0; i<mbs; i++) {
3254       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3255       nnz = nnz/bs;
3256       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3257       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3258       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3259     }
3260     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3261     ierr = PetscFree(bindx);CHKERRQ(ierr);
3262 
3263     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3264     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3265     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3266     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3267     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3268     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3269   }
3270 
3271   /* numeric phase */
3272   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3273   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3274 
3275   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3276   for (i=0; i<m; i++) {
3277     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3278     Ii   = i + rstart;
3279     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3280     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3281   }
3282   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3283   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285   PetscFunctionReturn(0);
3286 }
3287