xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 3aa2d9e3a17455108487be9a174c0f069d9014ad)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636       }
637       /* Bmat */
638       v = bmat->a; jj = bmat->j;
639       for (brow=0; brow<mbs; brow++) {
640         grow = bs*(rstart + brow);
641         nz = bmat->i[brow+1] - bmat->i[brow];
642         for (bcol=0; bcol<nz; bcol++) {
643           gcol = bs*garray[*jj]; jj++;
644           for (col=0; col<bs; col++) {
645             for (row=0; row<bs; row++) {
646               vabs            = PetscAbsScalar(*v); v++;
647               rsum[gcol+col] += vabs;
648               rsum[grow+row] += vabs;
649             }
650           }
651         }
652       }
653       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
654       *norm = 0.0;
655       for (col=0; col<mat->cmap->N; col++) {
656         if (rsum2[col] > *norm) *norm = rsum2[col];
657       }
658       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
676   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
677   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
678   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
679   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
680   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
686 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
687 {
688   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
689   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
690   PetscErrorCode ierr;
691   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscMPIInt    n;
695   PetscBool      r1,r2,r3;
696   MatScalar      *val;
697 
698   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
699   PetscFunctionBegin;
700   if (!baij->donotstash &&  !mat->nooffprocentries) {
701     while (1) {
702       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
703       if (!flg) break;
704 
705       for (i=0; i<n;) {
706         /* Now identify the consecutive vals belonging to the same row */
707         for (j=i,rstart=row[j]; j<n; j++) {
708           if (row[j] != rstart) break;
709         }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
714         i    = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718     /* Now process the block-stash. Since the values are stashed column-oriented,
719        set the roworiented flag to column oriented, and after MatSetValues()
720        restore the original flags */
721     r1 = baij->roworiented;
722     r2 = a->roworiented;
723     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
724 
725     baij->roworiented = PETSC_FALSE;
726     a->roworiented    = PETSC_FALSE;
727 
728     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
729     while (1) {
730       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
731       if (!flg) break;
732 
733       for (i=0; i<n;) {
734         /* Now identify the consecutive vals belonging to the same row */
735         for (j=i,rstart=row[j]; j<n; j++) {
736           if (row[j] != rstart) break;
737         }
738         if (j < n) ncols = j-i;
739         else       ncols = n-i;
740         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
741         i    = j;
742       }
743     }
744     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
745 
746     baij->roworiented = r1;
747     a->roworiented    = r2;
748 
749     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
750   }
751 
752   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
753   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
754 
755   /* determine if any processor has disassembled, if so we must
756      also disassemble ourselfs, in order that we may reassemble. */
757   /*
758      if nonzero structure of submatrix B cannot change then we know that
759      no processor disassembled thus we can skip this stuff
760   */
761   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
762     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
763     if (mat->was_assembled && !other_disassembled) {
764       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
765     }
766   }
767 
768   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
769     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
770   }
771   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
772   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
773 
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775 
776   baij->rowvalues = 0;
777 
778   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
779   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
780     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
781     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
787 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
788 #include <petscdraw.h>
789 #undef __FUNCT__
790 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
791 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
792 {
793   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
794   PetscErrorCode    ierr;
795   PetscInt          bs   = mat->rmap->bs;
796   PetscMPIInt       rank = baij->rank;
797   PetscBool         iascii,isdraw;
798   PetscViewer       sviewer;
799   PetscViewerFormat format;
800 
801   PetscFunctionBegin;
802   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
803   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
804   if (iascii) {
805     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
806     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
807       MatInfo info;
808       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
809       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
810       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
812       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
816       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
818       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
819       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
820       PetscFunctionReturn(0);
821     } else if (format == PETSC_VIEWER_ASCII_INFO) {
822       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
823       PetscFunctionReturn(0);
824     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
825       PetscFunctionReturn(0);
826     }
827   }
828 
829   if (isdraw) {
830     PetscDraw draw;
831     PetscBool isnull;
832     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
833     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
834   }
835 
836   {
837     /* assemble the entire matrix onto first processor. */
838     Mat          A;
839     Mat_SeqSBAIJ *Aloc;
840     Mat_SeqBAIJ  *Bloc;
841     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
842     MatScalar    *a;
843     const char   *matname;
844 
845     /* Should this be the same type as mat? */
846     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
847     if (!rank) {
848       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
849     } else {
850       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
851     }
852     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
853     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
854     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
855     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
856 
857     /* copy over the A part */
858     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
859     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
860     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
861 
862     for (i=0; i<mbs; i++) {
863       rvals[0] = bs*(baij->rstartbs + i);
864       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
865       for (j=ai[i]; j<ai[i+1]; j++) {
866         col = (baij->cstartbs+aj[j])*bs;
867         for (k=0; k<bs; k++) {
868           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
869           col++;
870           a += bs;
871         }
872       }
873     }
874     /* copy over the B part */
875     Bloc = (Mat_SeqBAIJ*)baij->B->data;
876     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
877     for (i=0; i<mbs; i++) {
878 
879       rvals[0] = bs*(baij->rstartbs + i);
880       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
881       for (j=ai[i]; j<ai[i+1]; j++) {
882         col = baij->garray[aj[j]]*bs;
883         for (k=0; k<bs; k++) {
884           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
885           col++;
886           a += bs;
887         }
888       }
889     }
890     ierr = PetscFree(rvals);CHKERRQ(ierr);
891     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
892     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
893     /*
894        Everyone has to call to draw the matrix since the graphics waits are
895        synchronized across all processors that share the PetscDraw object
896     */
897     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
898     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
899     if (!rank) {
900       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
901       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
902     }
903     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
904     ierr = MatDestroy(&A);CHKERRQ(ierr);
905   }
906   PetscFunctionReturn(0);
907 }
908 
909 #undef __FUNCT__
910 #define __FUNCT__ "MatView_MPISBAIJ"
911 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
912 {
913   PetscErrorCode ierr;
914   PetscBool      iascii,isdraw,issocket,isbinary;
915 
916   PetscFunctionBegin;
917   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
918   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
919   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
920   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
921   if (iascii || isdraw || issocket || isbinary) {
922     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatDestroy_MPISBAIJ"
929 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
930 {
931   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
932   PetscErrorCode ierr;
933 
934   PetscFunctionBegin;
935 #if defined(PETSC_USE_LOG)
936   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
937 #endif
938   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
939   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
940   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
941   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
942 #if defined(PETSC_USE_CTABLE)
943   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
944 #else
945   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
946 #endif
947   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
948   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
949   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
950   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
951   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
952   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
953   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
954   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
955   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
956   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
957   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
958   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
959   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
960   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
961   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
962 #if defined(PETSC_USE_REAL_MAT_SINGLE)
963   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
964 #endif
965   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
966   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
967   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
968   ierr = PetscFree(mat->data);CHKERRQ(ierr);
969 
970   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
971   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
972   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
973   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
976 #if defined(PETSC_HAVE_ELEMENTAL)
977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
978 #endif
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
984 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
985 {
986   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
987   PetscErrorCode    ierr;
988   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
989   PetscScalar       *from;
990   const PetscScalar *x;
991 
992   PetscFunctionBegin;
993   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
994   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
995 
996   /* diagonal part */
997   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
998   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
999 
1000   /* subdiagonal part */
1001   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1002 
1003   /* copy x into the vec slvec0 */
1004   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1006 
1007   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1008   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1009   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1010 
1011   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1012   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1013   /* supperdiagonal part */
1014   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMult_MPISBAIJ"
1020 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1021 {
1022   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1023   PetscErrorCode    ierr;
1024   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1025   PetscScalar       *from;
1026   const PetscScalar *x;
1027 
1028   PetscFunctionBegin;
1029   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1030   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1031 
1032   /* diagonal part */
1033   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1034   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1035 
1036   /* subdiagonal part */
1037   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1038 
1039   /* copy x into the vec slvec0 */
1040   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1041   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1042 
1043   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1044   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1045   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1046 
1047   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1048   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049   /* supperdiagonal part */
1050   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1056 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1057 {
1058   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1059   PetscErrorCode ierr;
1060   PetscInt       nt;
1061 
1062   PetscFunctionBegin;
1063   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1064   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1065 
1066   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1067   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1068 
1069   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070   /* do diagonal part */
1071   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1072   /* do supperdiagonal part */
1073   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1074   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1075   /* do subdiagonal part */
1076   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1077   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1078   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1084 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1085 {
1086   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1087   PetscErrorCode    ierr;
1088   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1089   PetscScalar       *from,zero=0.0;
1090   const PetscScalar *x;
1091 
1092   PetscFunctionBegin;
1093   /*
1094   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1095   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1096   */
1097   /* diagonal part */
1098   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1099   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1100 
1101   /* subdiagonal part */
1102   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1103 
1104   /* copy x into the vec slvec0 */
1105   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1106   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1107   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1108   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1109 
1110   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1112   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113 
1114   /* supperdiagonal part */
1115   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1121 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1122 {
1123   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1128   /* do diagonal part */
1129   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1130   /* do supperdiagonal part */
1131   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1132   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1133 
1134   /* do subdiagonal part */
1135   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1136   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 /*
1142   This only works correctly for square matrices where the subblock A->A is the
1143    diagonal block
1144 */
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1147 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1148 {
1149   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1154   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatScale_MPISBAIJ"
1160 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1161 {
1162   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1163   PetscErrorCode ierr;
1164 
1165   PetscFunctionBegin;
1166   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1167   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1173 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1174 {
1175   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1176   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1177   PetscErrorCode ierr;
1178   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1179   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1180   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1181 
1182   PetscFunctionBegin;
1183   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1184   mat->getrowactive = PETSC_TRUE;
1185 
1186   if (!mat->rowvalues && (idx || v)) {
1187     /*
1188         allocate enough space to hold information from the longest row.
1189     */
1190     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1191     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1192     PetscInt     max = 1,mbs = mat->mbs,tmp;
1193     for (i=0; i<mbs; i++) {
1194       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1195       if (max < tmp) max = tmp;
1196     }
1197     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1198   }
1199 
1200   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1201   lrow = row - brstart;  /* local row index */
1202 
1203   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1204   if (!v)   {pvA = 0; pvB = 0;}
1205   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1206   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1207   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1208   nztot = nzA + nzB;
1209 
1210   cmap = mat->garray;
1211   if (v  || idx) {
1212     if (nztot) {
1213       /* Sort by increasing column numbers, assuming A and B already sorted */
1214       PetscInt imark = -1;
1215       if (v) {
1216         *v = v_p = mat->rowvalues;
1217         for (i=0; i<nzB; i++) {
1218           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1219           else break;
1220         }
1221         imark = i;
1222         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1223         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1224       }
1225       if (idx) {
1226         *idx = idx_p = mat->rowindices;
1227         if (imark > -1) {
1228           for (i=0; i<imark; i++) {
1229             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1230           }
1231         } else {
1232           for (i=0; i<nzB; i++) {
1233             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1234             else break;
1235           }
1236           imark = i;
1237         }
1238         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1239         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1240       }
1241     } else {
1242       if (idx) *idx = 0;
1243       if (v)   *v   = 0;
1244     }
1245   }
1246   *nz  = nztot;
1247   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1248   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1254 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1255 {
1256   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1257 
1258   PetscFunctionBegin;
1259   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1260   baij->getrowactive = PETSC_FALSE;
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1266 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1267 {
1268   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1269   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1270 
1271   PetscFunctionBegin;
1272   aA->getrow_utriangular = PETSC_TRUE;
1273   PetscFunctionReturn(0);
1274 }
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1277 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1278 {
1279   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1280   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1281 
1282   PetscFunctionBegin;
1283   aA->getrow_utriangular = PETSC_FALSE;
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1289 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1290 {
1291   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1296   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1302 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1303 {
1304   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1305   PetscErrorCode ierr;
1306 
1307   PetscFunctionBegin;
1308   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1309   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1314    Input: isrow       - distributed(parallel),
1315           iscol_local - locally owned (seq)
1316 */
1317 #undef __FUNCT__
1318 #define __FUNCT__ "ISEqual_private"
1319 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1320 {
1321   PetscErrorCode ierr;
1322   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1323   const PetscInt *ptr1,*ptr2;
1324 
1325   PetscFunctionBegin;
1326   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1327   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1328   if (sz1 > sz2) {
1329     *flg = PETSC_FALSE;
1330     PetscFunctionReturn(0);
1331   }
1332 
1333   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1334   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1335 
1336   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1337   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1338   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1339   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1340   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1341   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1342 
1343   nmatch=0;
1344   k     = 0;
1345   for (i=0; i<sz1; i++){
1346     for (j=k; j<sz2; j++){
1347       if (a1[i] == a2[j]) {
1348         k = j; nmatch++;
1349         break;
1350       }
1351     }
1352   }
1353   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1354   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1355   ierr = PetscFree(a1);CHKERRQ(ierr);
1356   ierr = PetscFree(a2);CHKERRQ(ierr);
1357   if (nmatch < sz1) {
1358     *flg = PETSC_FALSE;
1359   } else {
1360     *flg = PETSC_TRUE;
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1367 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1368 {
1369   PetscErrorCode ierr;
1370   IS             iscol_local;
1371   PetscInt       csize;
1372   PetscBool      isequal;
1373 
1374   PetscFunctionBegin;
1375   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1376   if (call == MAT_REUSE_MATRIX) {
1377     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1378     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1379   } else {
1380     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1381     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1382     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1383   }
1384 
1385   /* now call MatGetSubMatrix_MPIBAIJ() */
1386   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1387   if (call == MAT_INITIAL_MATRIX) {
1388     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1389     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1396 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1397 {
1398   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1403   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1409 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1410 {
1411   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1412   Mat            A  = a->A,B = a->B;
1413   PetscErrorCode ierr;
1414   PetscReal      isend[5],irecv[5];
1415 
1416   PetscFunctionBegin;
1417   info->block_size = (PetscReal)matin->rmap->bs;
1418 
1419   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1420 
1421   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1422   isend[3] = info->memory;  isend[4] = info->mallocs;
1423 
1424   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1425 
1426   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1427   isend[3] += info->memory;  isend[4] += info->mallocs;
1428   if (flag == MAT_LOCAL) {
1429     info->nz_used      = isend[0];
1430     info->nz_allocated = isend[1];
1431     info->nz_unneeded  = isend[2];
1432     info->memory       = isend[3];
1433     info->mallocs      = isend[4];
1434   } else if (flag == MAT_GLOBAL_MAX) {
1435     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1436 
1437     info->nz_used      = irecv[0];
1438     info->nz_allocated = irecv[1];
1439     info->nz_unneeded  = irecv[2];
1440     info->memory       = irecv[3];
1441     info->mallocs      = irecv[4];
1442   } else if (flag == MAT_GLOBAL_SUM) {
1443     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1444 
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1451   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1452   info->fill_ratio_needed = 0;
1453   info->factor_mallocs    = 0;
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1459 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1460 {
1461   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1462   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   switch (op) {
1467   case MAT_NEW_NONZERO_LOCATIONS:
1468   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1469   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1470   case MAT_KEEP_NONZERO_PATTERN:
1471   case MAT_NEW_NONZERO_LOCATION_ERR:
1472     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1473     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1474     break;
1475   case MAT_ROW_ORIENTED:
1476     a->roworiented = flg;
1477 
1478     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1479     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1480     break;
1481   case MAT_NEW_DIAGONALS:
1482     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1483     break;
1484   case MAT_IGNORE_OFF_PROC_ENTRIES:
1485     a->donotstash = flg;
1486     break;
1487   case MAT_USE_HASH_TABLE:
1488     a->ht_flag = flg;
1489     break;
1490   case MAT_HERMITIAN:
1491     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1492     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1493 
1494     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1495     break;
1496   case MAT_SPD:
1497     A->spd_set = PETSC_TRUE;
1498     A->spd     = flg;
1499     if (flg) {
1500       A->symmetric                  = PETSC_TRUE;
1501       A->structurally_symmetric     = PETSC_TRUE;
1502       A->symmetric_set              = PETSC_TRUE;
1503       A->structurally_symmetric_set = PETSC_TRUE;
1504     }
1505     break;
1506   case MAT_SYMMETRIC:
1507     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1508     break;
1509   case MAT_STRUCTURALLY_SYMMETRIC:
1510     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1511     break;
1512   case MAT_SYMMETRY_ETERNAL:
1513     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1514     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1515     break;
1516   case MAT_IGNORE_LOWER_TRIANGULAR:
1517     aA->ignore_ltriangular = flg;
1518     break;
1519   case MAT_ERROR_LOWER_TRIANGULAR:
1520     aA->ignore_ltriangular = flg;
1521     break;
1522   case MAT_GETROW_UPPERTRIANGULAR:
1523     aA->getrow_utriangular = flg;
1524     break;
1525   default:
1526     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1527   }
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1533 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1534 {
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   if (MAT_INITIAL_MATRIX || *B != A) {
1539     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1546 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1547 {
1548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1549   Mat            a     = baij->A, b=baij->B;
1550   PetscErrorCode ierr;
1551   PetscInt       nv,m,n;
1552   PetscBool      flg;
1553 
1554   PetscFunctionBegin;
1555   if (ll != rr) {
1556     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1557     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1558   }
1559   if (!ll) PetscFunctionReturn(0);
1560 
1561   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1562   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1563 
1564   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1565   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1566 
1567   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568 
1569   /* left diagonalscale the off-diagonal part */
1570   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1571 
1572   /* scale the diagonal part */
1573   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1574 
1575   /* right diagonalscale the off-diagonal part */
1576   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1583 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1584 {
1585   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatEqual_MPISBAIJ"
1597 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1598 {
1599   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1600   Mat            a,b,c,d;
1601   PetscBool      flg;
1602   PetscErrorCode ierr;
1603 
1604   PetscFunctionBegin;
1605   a = matA->A; b = matA->B;
1606   c = matB->A; d = matB->B;
1607 
1608   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1609   if (flg) {
1610     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1611   }
1612   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatCopy_MPISBAIJ"
1618 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1619 {
1620   PetscErrorCode ierr;
1621   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1622   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1623 
1624   PetscFunctionBegin;
1625   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1626   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1627     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1628     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1629     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1630   } else {
1631     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1632     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1639 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1640 {
1641   PetscErrorCode ierr;
1642 
1643   PetscFunctionBegin;
1644   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1650 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1651 {
1652   PetscErrorCode ierr;
1653   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1654   PetscBLASInt   bnz,one=1;
1655   Mat_SeqSBAIJ   *xa,*ya;
1656   Mat_SeqBAIJ    *xb,*yb;
1657 
1658   PetscFunctionBegin;
1659   if (str == SAME_NONZERO_PATTERN) {
1660     PetscScalar alpha = a;
1661     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1662     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1663     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1664     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1665     xb   = (Mat_SeqBAIJ*)xx->B->data;
1666     yb   = (Mat_SeqBAIJ*)yy->B->data;
1667     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1668     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1669     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1670   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1671     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1672     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1673     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1674   } else {
1675     Mat      B;
1676     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1677     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1678     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1679     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1680     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1681     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1682     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1683     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1684     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1685     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1686     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1687     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1688     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1689     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1690     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1691     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1692     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1693     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1694     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1695     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1702 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1703 {
1704   PetscErrorCode ierr;
1705   PetscInt       i;
1706   PetscBool      flg,sorted;
1707 
1708   PetscFunctionBegin;
1709   for (i = 0; i < n; i++) {
1710     ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr);
1711     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i);
1712     ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr);
1713     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i);
1714   }
1715   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1716   for (i=0; i<n; i++) {
1717     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1718     if (!flg) { /* *B[i] is non-symmetric, set flag */
1719       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1720     }
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatShift_MPISBAIJ"
1727 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1728 {
1729   PetscErrorCode ierr;
1730   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1731   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data,*bij = (Mat_SeqSBAIJ*)maij->B->data;
1732 
1733   PetscFunctionBegin;
1734   if (!aij->nz && !bij->nz) {
1735     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1736   }
1737   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 /* -------------------------------------------------------------------*/
1742 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1743                                        MatGetRow_MPISBAIJ,
1744                                        MatRestoreRow_MPISBAIJ,
1745                                        MatMult_MPISBAIJ,
1746                                /*  4*/ MatMultAdd_MPISBAIJ,
1747                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1748                                        MatMultAdd_MPISBAIJ,
1749                                        0,
1750                                        0,
1751                                        0,
1752                                /* 10*/ 0,
1753                                        0,
1754                                        0,
1755                                        MatSOR_MPISBAIJ,
1756                                        MatTranspose_MPISBAIJ,
1757                                /* 15*/ MatGetInfo_MPISBAIJ,
1758                                        MatEqual_MPISBAIJ,
1759                                        MatGetDiagonal_MPISBAIJ,
1760                                        MatDiagonalScale_MPISBAIJ,
1761                                        MatNorm_MPISBAIJ,
1762                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1763                                        MatAssemblyEnd_MPISBAIJ,
1764                                        MatSetOption_MPISBAIJ,
1765                                        MatZeroEntries_MPISBAIJ,
1766                                /* 24*/ 0,
1767                                        0,
1768                                        0,
1769                                        0,
1770                                        0,
1771                                /* 29*/ MatSetUp_MPISBAIJ,
1772                                        0,
1773                                        0,
1774                                        0,
1775                                        0,
1776                                /* 34*/ MatDuplicate_MPISBAIJ,
1777                                        0,
1778                                        0,
1779                                        0,
1780                                        0,
1781                                /* 39*/ MatAXPY_MPISBAIJ,
1782                                        MatGetSubMatrices_MPISBAIJ,
1783                                        MatIncreaseOverlap_MPISBAIJ,
1784                                        MatGetValues_MPISBAIJ,
1785                                        MatCopy_MPISBAIJ,
1786                                /* 44*/ 0,
1787                                        MatScale_MPISBAIJ,
1788                                        MatShift_MPISBAIJ,
1789                                        0,
1790                                        0,
1791                                /* 49*/ 0,
1792                                        0,
1793                                        0,
1794                                        0,
1795                                        0,
1796                                /* 54*/ 0,
1797                                        0,
1798                                        MatSetUnfactored_MPISBAIJ,
1799                                        0,
1800                                        MatSetValuesBlocked_MPISBAIJ,
1801                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1802                                        0,
1803                                        0,
1804                                        0,
1805                                        0,
1806                                /* 64*/ 0,
1807                                        0,
1808                                        0,
1809                                        0,
1810                                        0,
1811                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1812                                        0,
1813                                        0,
1814                                        0,
1815                                        0,
1816                                /* 74*/ 0,
1817                                        0,
1818                                        0,
1819                                        0,
1820                                        0,
1821                                /* 79*/ 0,
1822                                        0,
1823                                        0,
1824                                        0,
1825                                        MatLoad_MPISBAIJ,
1826                                /* 84*/ 0,
1827                                        0,
1828                                        0,
1829                                        0,
1830                                        0,
1831                                /* 89*/ 0,
1832                                        0,
1833                                        0,
1834                                        0,
1835                                        0,
1836                                /* 94*/ 0,
1837                                        0,
1838                                        0,
1839                                        0,
1840                                        0,
1841                                /* 99*/ 0,
1842                                        0,
1843                                        0,
1844                                        0,
1845                                        0,
1846                                /*104*/ 0,
1847                                        MatRealPart_MPISBAIJ,
1848                                        MatImaginaryPart_MPISBAIJ,
1849                                        MatGetRowUpperTriangular_MPISBAIJ,
1850                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1851                                /*109*/ 0,
1852                                        0,
1853                                        0,
1854                                        0,
1855                                        0,
1856                                /*114*/ 0,
1857                                        0,
1858                                        0,
1859                                        0,
1860                                        0,
1861                                /*119*/ 0,
1862                                        0,
1863                                        0,
1864                                        0,
1865                                        0,
1866                                /*124*/ 0,
1867                                        0,
1868                                        0,
1869                                        0,
1870                                        0,
1871                                /*129*/ 0,
1872                                        0,
1873                                        0,
1874                                        0,
1875                                        0,
1876                                /*134*/ 0,
1877                                        0,
1878                                        0,
1879                                        0,
1880                                        0,
1881                                /*139*/ 0,
1882                                        0,
1883                                        0,
1884                                        0,
1885                                        0,
1886                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1887 };
1888 
1889 #undef __FUNCT__
1890 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1891 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1892 {
1893   PetscFunctionBegin;
1894   *a = ((Mat_MPISBAIJ*)A->data)->A;
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1900 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1901 {
1902   Mat_MPISBAIJ   *b;
1903   PetscErrorCode ierr;
1904   PetscInt       i,mbs,Mbs;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1908   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1909   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1910   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1911 
1912   b   = (Mat_MPISBAIJ*)B->data;
1913   mbs = B->rmap->n/bs;
1914   Mbs = B->rmap->N/bs;
1915   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1916 
1917   B->rmap->bs = bs;
1918   b->bs2      = bs*bs;
1919   b->mbs      = mbs;
1920   b->Mbs      = Mbs;
1921   b->nbs      = B->cmap->n/bs;
1922   b->Nbs      = B->cmap->N/bs;
1923 
1924   for (i=0; i<=b->size; i++) {
1925     b->rangebs[i] = B->rmap->range[i]/bs;
1926   }
1927   b->rstartbs = B->rmap->rstart/bs;
1928   b->rendbs   = B->rmap->rend/bs;
1929 
1930   b->cstartbs = B->cmap->rstart/bs;
1931   b->cendbs   = B->cmap->rend/bs;
1932 
1933   if (!B->preallocated) {
1934     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1935     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1936     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1937     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1938     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1939     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1940     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1941     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1942     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1943   }
1944 
1945   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1946   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1947 
1948   B->preallocated = PETSC_TRUE;
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1954 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1955 {
1956   PetscInt       m,rstart,cstart,cend;
1957   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1958   const PetscInt *JJ    =0;
1959   PetscScalar    *values=0;
1960   PetscErrorCode ierr;
1961 
1962   PetscFunctionBegin;
1963   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1964   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1965   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1966   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1967   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1968   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1969   m      = B->rmap->n/bs;
1970   rstart = B->rmap->rstart/bs;
1971   cstart = B->cmap->rstart/bs;
1972   cend   = B->cmap->rend/bs;
1973 
1974   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1975   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1976   for (i=0; i<m; i++) {
1977     nz = ii[i+1] - ii[i];
1978     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1979     nz_max = PetscMax(nz_max,nz);
1980     JJ     = jj + ii[i];
1981     for (j=0; j<nz; j++) {
1982       if (*JJ >= cstart) break;
1983       JJ++;
1984     }
1985     d = 0;
1986     for (; j<nz; j++) {
1987       if (*JJ++ >= cend) break;
1988       d++;
1989     }
1990     d_nnz[i] = d;
1991     o_nnz[i] = nz - d;
1992   }
1993   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1994   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1995 
1996   values = (PetscScalar*)V;
1997   if (!values) {
1998     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1999     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2000   }
2001   for (i=0; i<m; i++) {
2002     PetscInt          row    = i + rstart;
2003     PetscInt          ncols  = ii[i+1] - ii[i];
2004     const PetscInt    *icols = jj + ii[i];
2005     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2006     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2007   }
2008 
2009   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2010   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2011   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2012   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 /*MC
2017    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2018    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2019    the matrix is stored.
2020 
2021   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2022   can call MatSetOption(Mat, MAT_HERMITIAN);
2023 
2024    Options Database Keys:
2025 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2026 
2027   Level: beginner
2028 
2029 .seealso: MatCreateMPISBAIJ
2030 M*/
2031 
2032 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatCreate_MPISBAIJ"
2036 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2037 {
2038   Mat_MPISBAIJ   *b;
2039   PetscErrorCode ierr;
2040   PetscBool      flg = PETSC_FALSE;
2041 
2042   PetscFunctionBegin;
2043   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2044   B->data = (void*)b;
2045   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2046 
2047   B->ops->destroy = MatDestroy_MPISBAIJ;
2048   B->ops->view    = MatView_MPISBAIJ;
2049   B->assembled    = PETSC_FALSE;
2050   B->insertmode   = NOT_SET_VALUES;
2051 
2052   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2053   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2054 
2055   /* build local table of row and column ownerships */
2056   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2057 
2058   /* build cache for off array entries formed */
2059   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2060 
2061   b->donotstash  = PETSC_FALSE;
2062   b->colmap      = NULL;
2063   b->garray      = NULL;
2064   b->roworiented = PETSC_TRUE;
2065 
2066   /* stuff used in block assembly */
2067   b->barray = 0;
2068 
2069   /* stuff used for matrix vector multiply */
2070   b->lvec    = 0;
2071   b->Mvctx   = 0;
2072   b->slvec0  = 0;
2073   b->slvec0b = 0;
2074   b->slvec1  = 0;
2075   b->slvec1a = 0;
2076   b->slvec1b = 0;
2077   b->sMvctx  = 0;
2078 
2079   /* stuff for MatGetRow() */
2080   b->rowindices   = 0;
2081   b->rowvalues    = 0;
2082   b->getrowactive = PETSC_FALSE;
2083 
2084   /* hash table stuff */
2085   b->ht           = 0;
2086   b->hd           = 0;
2087   b->ht_size      = 0;
2088   b->ht_flag      = PETSC_FALSE;
2089   b->ht_fact      = 0;
2090   b->ht_total_ct  = 0;
2091   b->ht_insert_ct = 0;
2092 
2093   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2094   b->ijonly = PETSC_FALSE;
2095 
2096   b->in_loc = 0;
2097   b->v_loc  = 0;
2098   b->n_loc  = 0;
2099 
2100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
2106 #if defined(PETSC_HAVE_ELEMENTAL)
2107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2108 #endif
2109 
2110   B->symmetric                  = PETSC_TRUE;
2111   B->structurally_symmetric     = PETSC_TRUE;
2112   B->symmetric_set              = PETSC_TRUE;
2113   B->structurally_symmetric_set = PETSC_TRUE;
2114 
2115   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2116   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2117   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2118   if (flg) {
2119     PetscReal fact = 1.39;
2120     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2121     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2122     if (fact <= 1.0) fact = 1.39;
2123     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2124     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2125   }
2126   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 /*MC
2131    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2132 
2133    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2134    and MATMPISBAIJ otherwise.
2135 
2136    Options Database Keys:
2137 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2138 
2139   Level: beginner
2140 
2141 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2142 M*/
2143 
2144 #undef __FUNCT__
2145 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2146 /*@C
2147    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2148    the user should preallocate the matrix storage by setting the parameters
2149    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2150    performance can be increased by more than a factor of 50.
2151 
2152    Collective on Mat
2153 
2154    Input Parameters:
2155 +  B - the matrix
2156 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2157           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2158 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2159            submatrix  (same for all local rows)
2160 .  d_nnz - array containing the number of block nonzeros in the various block rows
2161            in the upper triangular and diagonal part of the in diagonal portion of the local
2162            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2163            for the diagonal entry and set a value even if it is zero.
2164 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2165            submatrix (same for all local rows).
2166 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2167            off-diagonal portion of the local submatrix that is right of the diagonal
2168            (possibly different for each block row) or NULL.
2169 
2170 
2171    Options Database Keys:
2172 .   -mat_no_unroll - uses code that does not unroll the loops in the
2173                      block calculations (much slower)
2174 .   -mat_block_size - size of the blocks to use
2175 
2176    Notes:
2177 
2178    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2179    than it must be used on all processors that share the object for that argument.
2180 
2181    If the *_nnz parameter is given then the *_nz parameter is ignored
2182 
2183    Storage Information:
2184    For a square global matrix we define each processor's diagonal portion
2185    to be its local rows and the corresponding columns (a square submatrix);
2186    each processor's off-diagonal portion encompasses the remainder of the
2187    local matrix (a rectangular submatrix).
2188 
2189    The user can specify preallocated storage for the diagonal part of
2190    the local submatrix with either d_nz or d_nnz (not both).  Set
2191    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2192    memory allocation.  Likewise, specify preallocated storage for the
2193    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2194 
2195    You can call MatGetInfo() to get information on how effective the preallocation was;
2196    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2197    You can also run with the option -info and look for messages with the string
2198    malloc in them to see if additional memory allocation was needed.
2199 
2200    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2201    the figure below we depict these three local rows and all columns (0-11).
2202 
2203 .vb
2204            0 1 2 3 4 5 6 7 8 9 10 11
2205           --------------------------
2206    row 3  |. . . d d d o o o o  o  o
2207    row 4  |. . . d d d o o o o  o  o
2208    row 5  |. . . d d d o o o o  o  o
2209           --------------------------
2210 .ve
2211 
2212    Thus, any entries in the d locations are stored in the d (diagonal)
2213    submatrix, and any entries in the o locations are stored in the
2214    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2215    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2216 
2217    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2218    plus the diagonal part of the d matrix,
2219    and o_nz should indicate the number of block nonzeros per row in the o matrix
2220 
2221    In general, for PDE problems in which most nonzeros are near the diagonal,
2222    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2223    or you will get TERRIBLE performance; see the users' manual chapter on
2224    matrices.
2225 
2226    Level: intermediate
2227 
2228 .keywords: matrix, block, aij, compressed row, sparse, parallel
2229 
2230 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2231 @*/
2232 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2233 {
2234   PetscErrorCode ierr;
2235 
2236   PetscFunctionBegin;
2237   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2238   PetscValidType(B,1);
2239   PetscValidLogicalCollectiveInt(B,bs,2);
2240   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatCreateSBAIJ"
2246 /*@C
2247    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2248    (block compressed row).  For good matrix assembly performance
2249    the user should preallocate the matrix storage by setting the parameters
2250    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2251    performance can be increased by more than a factor of 50.
2252 
2253    Collective on MPI_Comm
2254 
2255    Input Parameters:
2256 +  comm - MPI communicator
2257 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2258           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2259 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2260            This value should be the same as the local size used in creating the
2261            y vector for the matrix-vector product y = Ax.
2262 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2263            This value should be the same as the local size used in creating the
2264            x vector for the matrix-vector product y = Ax.
2265 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2266 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2267 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2268            submatrix  (same for all local rows)
2269 .  d_nnz - array containing the number of block nonzeros in the various block rows
2270            in the upper triangular portion of the in diagonal portion of the local
2271            (possibly different for each block block row) or NULL.
2272            If you plan to factor the matrix you must leave room for the diagonal entry and
2273            set its value even if it is zero.
2274 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2275            submatrix (same for all local rows).
2276 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2277            off-diagonal portion of the local submatrix (possibly different for
2278            each block row) or NULL.
2279 
2280    Output Parameter:
2281 .  A - the matrix
2282 
2283    Options Database Keys:
2284 .   -mat_no_unroll - uses code that does not unroll the loops in the
2285                      block calculations (much slower)
2286 .   -mat_block_size - size of the blocks to use
2287 .   -mat_mpi - use the parallel matrix data structures even on one processor
2288                (defaults to using SeqBAIJ format on one processor)
2289 
2290    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2291    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2292    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2293 
2294    Notes:
2295    The number of rows and columns must be divisible by blocksize.
2296    This matrix type does not support complex Hermitian operation.
2297 
2298    The user MUST specify either the local or global matrix dimensions
2299    (possibly both).
2300 
2301    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2302    than it must be used on all processors that share the object for that argument.
2303 
2304    If the *_nnz parameter is given then the *_nz parameter is ignored
2305 
2306    Storage Information:
2307    For a square global matrix we define each processor's diagonal portion
2308    to be its local rows and the corresponding columns (a square submatrix);
2309    each processor's off-diagonal portion encompasses the remainder of the
2310    local matrix (a rectangular submatrix).
2311 
2312    The user can specify preallocated storage for the diagonal part of
2313    the local submatrix with either d_nz or d_nnz (not both).  Set
2314    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2315    memory allocation.  Likewise, specify preallocated storage for the
2316    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2317 
2318    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2319    the figure below we depict these three local rows and all columns (0-11).
2320 
2321 .vb
2322            0 1 2 3 4 5 6 7 8 9 10 11
2323           --------------------------
2324    row 3  |. . . d d d o o o o  o  o
2325    row 4  |. . . d d d o o o o  o  o
2326    row 5  |. . . d d d o o o o  o  o
2327           --------------------------
2328 .ve
2329 
2330    Thus, any entries in the d locations are stored in the d (diagonal)
2331    submatrix, and any entries in the o locations are stored in the
2332    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2333    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2334 
2335    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2336    plus the diagonal part of the d matrix,
2337    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2338    In general, for PDE problems in which most nonzeros are near the diagonal,
2339    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2340    or you will get TERRIBLE performance; see the users' manual chapter on
2341    matrices.
2342 
2343    Level: intermediate
2344 
2345 .keywords: matrix, block, aij, compressed row, sparse, parallel
2346 
2347 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2348 @*/
2349 
2350 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2351 {
2352   PetscErrorCode ierr;
2353   PetscMPIInt    size;
2354 
2355   PetscFunctionBegin;
2356   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2357   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2358   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2359   if (size > 1) {
2360     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2361     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2362   } else {
2363     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2364     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2365   }
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2372 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2373 {
2374   Mat            mat;
2375   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2376   PetscErrorCode ierr;
2377   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2378   PetscScalar    *array;
2379 
2380   PetscFunctionBegin;
2381   *newmat = 0;
2382 
2383   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2384   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2385   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2386   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2387   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2388   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2389 
2390   mat->factortype   = matin->factortype;
2391   mat->preallocated = PETSC_TRUE;
2392   mat->assembled    = PETSC_TRUE;
2393   mat->insertmode   = NOT_SET_VALUES;
2394 
2395   a      = (Mat_MPISBAIJ*)mat->data;
2396   a->bs2 = oldmat->bs2;
2397   a->mbs = oldmat->mbs;
2398   a->nbs = oldmat->nbs;
2399   a->Mbs = oldmat->Mbs;
2400   a->Nbs = oldmat->Nbs;
2401 
2402 
2403   a->size         = oldmat->size;
2404   a->rank         = oldmat->rank;
2405   a->donotstash   = oldmat->donotstash;
2406   a->roworiented  = oldmat->roworiented;
2407   a->rowindices   = 0;
2408   a->rowvalues    = 0;
2409   a->getrowactive = PETSC_FALSE;
2410   a->barray       = 0;
2411   a->rstartbs     = oldmat->rstartbs;
2412   a->rendbs       = oldmat->rendbs;
2413   a->cstartbs     = oldmat->cstartbs;
2414   a->cendbs       = oldmat->cendbs;
2415 
2416   /* hash table stuff */
2417   a->ht           = 0;
2418   a->hd           = 0;
2419   a->ht_size      = 0;
2420   a->ht_flag      = oldmat->ht_flag;
2421   a->ht_fact      = oldmat->ht_fact;
2422   a->ht_total_ct  = 0;
2423   a->ht_insert_ct = 0;
2424 
2425   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2426   if (oldmat->colmap) {
2427 #if defined(PETSC_USE_CTABLE)
2428     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2429 #else
2430     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2431     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2432     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2433 #endif
2434   } else a->colmap = 0;
2435 
2436   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2437     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2438     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2439     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2440   } else a->garray = 0;
2441 
2442   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2443   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2444   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2445   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2446   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2447 
2448   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2450   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2451   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2452 
2453   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2454   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2455   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2456   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2457   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2458   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2459   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2460   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2461   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2462   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2463   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2464   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2465   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2466 
2467   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2468   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2469   a->sMvctx = oldmat->sMvctx;
2470   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2471 
2472   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2473   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2474   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2475   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2476   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2477   *newmat = mat;
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatLoad_MPISBAIJ"
2483 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2484 {
2485   PetscErrorCode ierr;
2486   PetscInt       i,nz,j,rstart,rend;
2487   PetscScalar    *vals,*buf;
2488   MPI_Comm       comm;
2489   MPI_Status     status;
2490   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2491   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2492   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2493   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2494   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2495   PetscInt       dcount,kmax,k,nzcount,tmp;
2496   int            fd;
2497 
2498   PetscFunctionBegin;
2499   /* force binary viewer to load .info file if it has not yet done so */
2500   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2501   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2502   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2503   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2504   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2505   if (bs < 0) bs = 1;
2506 
2507   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2508   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2509   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2510   if (!rank) {
2511     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2512     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2513     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2514   }
2515 
2516   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2517   M    = header[1];
2518   N    = header[2];
2519 
2520   /* If global sizes are set, check if they are consistent with that given in the file */
2521   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2522   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2523 
2524   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2525 
2526   /*
2527      This code adds extra rows to make sure the number of rows is
2528      divisible by the blocksize
2529   */
2530   Mbs        = M/bs;
2531   extra_rows = bs - M + bs*(Mbs);
2532   if (extra_rows == bs) extra_rows = 0;
2533   else                  Mbs++;
2534   if (extra_rows &&!rank) {
2535     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2536   }
2537 
2538   /* determine ownership of all rows */
2539   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2540     mbs = Mbs/size + ((Mbs % size) > rank);
2541     m   = mbs*bs;
2542   } else { /* User Set */
2543     m   = newmat->rmap->n;
2544     mbs = m/bs;
2545   }
2546   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2547   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2548   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2549   rowners[0] = 0;
2550   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2551   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2552   rstart = rowners[rank];
2553   rend   = rowners[rank+1];
2554 
2555   /* distribute row lengths to all processors */
2556   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2557   if (!rank) {
2558     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2559     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2560     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2561     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2562     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2563     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2564     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2565   } else {
2566     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2567   }
2568 
2569   if (!rank) {   /* procs[0] */
2570     /* calculate the number of nonzeros on each processor */
2571     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2572     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2573     for (i=0; i<size; i++) {
2574       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2575         procsnz[i] += rowlengths[j];
2576       }
2577     }
2578     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2579 
2580     /* determine max buffer needed and allocate it */
2581     maxnz = 0;
2582     for (i=0; i<size; i++) {
2583       maxnz = PetscMax(maxnz,procsnz[i]);
2584     }
2585     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2586 
2587     /* read in my part of the matrix column indices  */
2588     nz     = procsnz[0];
2589     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2590     mycols = ibuf;
2591     if (size == 1) nz -= extra_rows;
2592     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2593     if (size == 1) {
2594       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2595     }
2596 
2597     /* read in every ones (except the last) and ship off */
2598     for (i=1; i<size-1; i++) {
2599       nz   = procsnz[i];
2600       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2601       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2602     }
2603     /* read in the stuff for the last proc */
2604     if (size != 1) {
2605       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2606       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2607       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2608       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2609     }
2610     ierr = PetscFree(cols);CHKERRQ(ierr);
2611   } else {  /* procs[i], i>0 */
2612     /* determine buffer space needed for message */
2613     nz = 0;
2614     for (i=0; i<m; i++) nz += locrowlens[i];
2615     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2616     mycols = ibuf;
2617     /* receive message of column indices*/
2618     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2619     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2620     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2621   }
2622 
2623   /* loop over local rows, determining number of off diagonal entries */
2624   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2625   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2626   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2627   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2628   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2629   rowcount = 0;
2630   nzcount  = 0;
2631   for (i=0; i<mbs; i++) {
2632     dcount  = 0;
2633     odcount = 0;
2634     for (j=0; j<bs; j++) {
2635       kmax = locrowlens[rowcount];
2636       for (k=0; k<kmax; k++) {
2637         tmp = mycols[nzcount++]/bs; /* block col. index */
2638         if (!mask[tmp]) {
2639           mask[tmp] = 1;
2640           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2641           else masked1[dcount++] = tmp; /* entry in diag portion */
2642         }
2643       }
2644       rowcount++;
2645     }
2646 
2647     dlens[i]  = dcount;  /* d_nzz[i] */
2648     odlens[i] = odcount; /* o_nzz[i] */
2649 
2650     /* zero out the mask elements we set */
2651     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2652     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2653   }
2654   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2655   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2656   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2657 
2658   if (!rank) {
2659     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2660     /* read in my part of the matrix numerical values  */
2661     nz     = procsnz[0];
2662     vals   = buf;
2663     mycols = ibuf;
2664     if (size == 1) nz -= extra_rows;
2665     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2666     if (size == 1) {
2667       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2668     }
2669 
2670     /* insert into matrix */
2671     jj = rstart*bs;
2672     for (i=0; i<m; i++) {
2673       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2674       mycols += locrowlens[i];
2675       vals   += locrowlens[i];
2676       jj++;
2677     }
2678 
2679     /* read in other processors (except the last one) and ship out */
2680     for (i=1; i<size-1; i++) {
2681       nz   = procsnz[i];
2682       vals = buf;
2683       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2684       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2685     }
2686     /* the last proc */
2687     if (size != 1) {
2688       nz   = procsnz[i] - extra_rows;
2689       vals = buf;
2690       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2691       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2692       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2693     }
2694     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2695 
2696   } else {
2697     /* receive numeric values */
2698     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2699 
2700     /* receive message of values*/
2701     vals   = buf;
2702     mycols = ibuf;
2703     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2704     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2705     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2706 
2707     /* insert into matrix */
2708     jj = rstart*bs;
2709     for (i=0; i<m; i++) {
2710       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2711       mycols += locrowlens[i];
2712       vals   += locrowlens[i];
2713       jj++;
2714     }
2715   }
2716 
2717   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2718   ierr = PetscFree(buf);CHKERRQ(ierr);
2719   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2720   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2721   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2722   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2723   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2724   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 #undef __FUNCT__
2729 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2730 /*XXXXX@
2731    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2732 
2733    Input Parameters:
2734 .  mat  - the matrix
2735 .  fact - factor
2736 
2737    Not Collective on Mat, each process can have a different hash factor
2738 
2739    Level: advanced
2740 
2741   Notes:
2742    This can also be set by the command line option: -mat_use_hash_table fact
2743 
2744 .keywords: matrix, hashtable, factor, HT
2745 
2746 .seealso: MatSetOption()
2747 @XXXXX*/
2748 
2749 
2750 #undef __FUNCT__
2751 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2752 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2753 {
2754   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2755   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2756   PetscReal      atmp;
2757   PetscReal      *work,*svalues,*rvalues;
2758   PetscErrorCode ierr;
2759   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2760   PetscMPIInt    rank,size;
2761   PetscInt       *rowners_bs,dest,count,source;
2762   PetscScalar    *va;
2763   MatScalar      *ba;
2764   MPI_Status     stat;
2765 
2766   PetscFunctionBegin;
2767   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2768   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2769   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2770 
2771   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2772   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2773 
2774   bs  = A->rmap->bs;
2775   mbs = a->mbs;
2776   Mbs = a->Mbs;
2777   ba  = b->a;
2778   bi  = b->i;
2779   bj  = b->j;
2780 
2781   /* find ownerships */
2782   rowners_bs = A->rmap->range;
2783 
2784   /* each proc creates an array to be distributed */
2785   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2786   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2787 
2788   /* row_max for B */
2789   if (rank != size-1) {
2790     for (i=0; i<mbs; i++) {
2791       ncols = bi[1] - bi[0]; bi++;
2792       brow  = bs*i;
2793       for (j=0; j<ncols; j++) {
2794         bcol = bs*(*bj);
2795         for (kcol=0; kcol<bs; kcol++) {
2796           col  = bcol + kcol;                /* local col index */
2797           col += rowners_bs[rank+1];      /* global col index */
2798           for (krow=0; krow<bs; krow++) {
2799             atmp = PetscAbsScalar(*ba); ba++;
2800             row  = brow + krow;   /* local row index */
2801             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2802             if (work[col] < atmp) work[col] = atmp;
2803           }
2804         }
2805         bj++;
2806       }
2807     }
2808 
2809     /* send values to its owners */
2810     for (dest=rank+1; dest<size; dest++) {
2811       svalues = work + rowners_bs[dest];
2812       count   = rowners_bs[dest+1]-rowners_bs[dest];
2813       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2814     }
2815   }
2816 
2817   /* receive values */
2818   if (rank) {
2819     rvalues = work;
2820     count   = rowners_bs[rank+1]-rowners_bs[rank];
2821     for (source=0; source<rank; source++) {
2822       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2823       /* process values */
2824       for (i=0; i<count; i++) {
2825         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2826       }
2827     }
2828   }
2829 
2830   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2831   ierr = PetscFree(work);CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatSOR_MPISBAIJ"
2837 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2838 {
2839   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2840   PetscErrorCode    ierr;
2841   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2842   PetscScalar       *x,*ptr,*from;
2843   Vec               bb1;
2844   const PetscScalar *b;
2845 
2846   PetscFunctionBegin;
2847   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2848   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2849 
2850   if (flag == SOR_APPLY_UPPER) {
2851     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2852     PetscFunctionReturn(0);
2853   }
2854 
2855   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2856     if (flag & SOR_ZERO_INITIAL_GUESS) {
2857       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2858       its--;
2859     }
2860 
2861     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2862     while (its--) {
2863 
2864       /* lower triangular part: slvec0b = - B^T*xx */
2865       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2866 
2867       /* copy xx into slvec0a */
2868       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2869       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2870       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2871       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2872 
2873       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2874 
2875       /* copy bb into slvec1a */
2876       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2877       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2878       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2879       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2880 
2881       /* set slvec1b = 0 */
2882       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2883 
2884       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2885       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2886       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2887       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2888 
2889       /* upper triangular part: bb1 = bb1 - B*x */
2890       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2891 
2892       /* local diagonal sweep */
2893       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2894     }
2895     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2896   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2897     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2898   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2899     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2900   } else if (flag & SOR_EISENSTAT) {
2901     Vec               xx1;
2902     PetscBool         hasop;
2903     const PetscScalar *diag;
2904     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2905     PetscInt          i,n;
2906 
2907     if (!mat->xx1) {
2908       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2909       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2910     }
2911     xx1 = mat->xx1;
2912     bb1 = mat->bb1;
2913 
2914     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2915 
2916     if (!mat->diag) {
2917       /* this is wrong for same matrix with new nonzero values */
2918       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2919       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2920     }
2921     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2922 
2923     if (hasop) {
2924       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2925       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2926     } else {
2927       /*
2928           These two lines are replaced by code that may be a bit faster for a good compiler
2929       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2930       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2931       */
2932       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2933       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2934       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2935       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2936       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2937       if (omega == 1.0) {
2938         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2939         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2940       } else {
2941         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2942         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2943       }
2944       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2945       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2946       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2947       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2948     }
2949 
2950     /* multiply off-diagonal portion of matrix */
2951     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2952     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2953     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2954     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2955     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2956     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2957     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2958     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2959     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2960     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2961 
2962     /* local sweep */
2963     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2964     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2965   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2971 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2972 {
2973   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2974   PetscErrorCode ierr;
2975   Vec            lvec1,bb1;
2976 
2977   PetscFunctionBegin;
2978   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2979   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2980 
2981   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2982     if (flag & SOR_ZERO_INITIAL_GUESS) {
2983       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2984       its--;
2985     }
2986 
2987     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2988     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2989     while (its--) {
2990       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2991 
2992       /* lower diagonal part: bb1 = bb - B^T*xx */
2993       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2994       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2995 
2996       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2997       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2998       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2999 
3000       /* upper diagonal part: bb1 = bb1 - B*x */
3001       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3002       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3003 
3004       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3005 
3006       /* diagonal sweep */
3007       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3008     }
3009     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3010     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3011   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3017 /*@
3018      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3019          CSR format the local rows.
3020 
3021    Collective on MPI_Comm
3022 
3023    Input Parameters:
3024 +  comm - MPI communicator
3025 .  bs - the block size, only a block size of 1 is supported
3026 .  m - number of local rows (Cannot be PETSC_DECIDE)
3027 .  n - This value should be the same as the local size used in creating the
3028        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3029        calculated if N is given) For square matrices n is almost always m.
3030 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3031 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3032 .   i - row indices
3033 .   j - column indices
3034 -   a - matrix values
3035 
3036    Output Parameter:
3037 .   mat - the matrix
3038 
3039    Level: intermediate
3040 
3041    Notes:
3042        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3043      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3044      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3045 
3046        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3047 
3048 .keywords: matrix, aij, compressed row, sparse, parallel
3049 
3050 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3051           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3052 @*/
3053 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3054 {
3055   PetscErrorCode ierr;
3056 
3057 
3058   PetscFunctionBegin;
3059   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3060   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3061   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3062   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3063   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3064   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3065   PetscFunctionReturn(0);
3066 }
3067 
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3071 /*@C
3072    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3073    (the default parallel PETSc format).
3074 
3075    Collective on MPI_Comm
3076 
3077    Input Parameters:
3078 +  B - the matrix
3079 .  bs - the block size
3080 .  i - the indices into j for the start of each local row (starts with zero)
3081 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3082 -  v - optional values in the matrix
3083 
3084    Level: developer
3085 
3086 .keywords: matrix, aij, compressed row, sparse, parallel
3087 
3088 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3089 @*/
3090 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3091 {
3092   PetscErrorCode ierr;
3093 
3094   PetscFunctionBegin;
3095   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3101 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3102 {
3103   PetscErrorCode ierr;
3104   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3105   PetscInt       *indx;
3106   PetscScalar    *values;
3107 
3108   PetscFunctionBegin;
3109   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3110   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3111     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3112     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3113     PetscInt       *bindx,rmax=a->rmax,j;
3114 
3115     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3116     mbs = m/bs; Nbs = N/cbs;
3117     if (n == PETSC_DECIDE) {
3118       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3119     }
3120     /* Check sum(n) = Nbs */
3121     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3122     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3123 
3124     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3125     rstart -= mbs;
3126 
3127     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3128     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3129     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3130     for (i=0; i<mbs; i++) {
3131       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3132       nnz = nnz/bs;
3133       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3134       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3135       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3136     }
3137     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3138     ierr = PetscFree(bindx);CHKERRQ(ierr);
3139 
3140     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3141     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3142     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3143     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3144     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3145     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3146   }
3147 
3148   /* numeric phase */
3149   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3150   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3151 
3152   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3153   for (i=0; i<m; i++) {
3154     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3155     Ii   = i + rstart;
3156     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3157     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3158   }
3159   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3160   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164