xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b2566f29c2b6470df769aa9f7deb9e2726b0959e)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636       }
637       /* Bmat */
638       v = bmat->a; jj = bmat->j;
639       for (brow=0; brow<mbs; brow++) {
640         grow = bs*(rstart + brow);
641         nz = bmat->i[brow+1] - bmat->i[brow];
642         for (bcol=0; bcol<nz; bcol++) {
643           gcol = bs*garray[*jj]; jj++;
644           for (col=0; col<bs; col++) {
645             for (row=0; row<bs; row++) {
646               vabs            = PetscAbsScalar(*v); v++;
647               rsum[gcol+col] += vabs;
648               rsum[grow+row] += vabs;
649             }
650           }
651         }
652       }
653       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
654       *norm = 0.0;
655       for (col=0; col<mat->cmap->N; col++) {
656         if (rsum2[col] > *norm) *norm = rsum2[col];
657       }
658       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
676   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
677   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
678   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
679   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
680   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
686 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
687 {
688   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
689   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
690   PetscErrorCode ierr;
691   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscMPIInt    n;
695   PetscBool      r1,r2,r3;
696   MatScalar      *val;
697 
698   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
699   PetscFunctionBegin;
700   if (!baij->donotstash &&  !mat->nooffprocentries) {
701     while (1) {
702       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
703       if (!flg) break;
704 
705       for (i=0; i<n;) {
706         /* Now identify the consecutive vals belonging to the same row */
707         for (j=i,rstart=row[j]; j<n; j++) {
708           if (row[j] != rstart) break;
709         }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
714         i    = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718     /* Now process the block-stash. Since the values are stashed column-oriented,
719        set the roworiented flag to column oriented, and after MatSetValues()
720        restore the original flags */
721     r1 = baij->roworiented;
722     r2 = a->roworiented;
723     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
724 
725     baij->roworiented = PETSC_FALSE;
726     a->roworiented    = PETSC_FALSE;
727 
728     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
729     while (1) {
730       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
731       if (!flg) break;
732 
733       for (i=0; i<n;) {
734         /* Now identify the consecutive vals belonging to the same row */
735         for (j=i,rstart=row[j]; j<n; j++) {
736           if (row[j] != rstart) break;
737         }
738         if (j < n) ncols = j-i;
739         else       ncols = n-i;
740         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
741         i    = j;
742       }
743     }
744     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
745 
746     baij->roworiented = r1;
747     a->roworiented    = r2;
748 
749     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
750   }
751 
752   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
753   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
754 
755   /* determine if any processor has disassembled, if so we must
756      also disassemble ourselfs, in order that we may reassemble. */
757   /*
758      if nonzero structure of submatrix B cannot change then we know that
759      no processor disassembled thus we can skip this stuff
760   */
761   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
762     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
763     if (mat->was_assembled && !other_disassembled) {
764       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
765     }
766   }
767 
768   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
769     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
770   }
771   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
772   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
773 
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775 
776   baij->rowvalues = 0;
777 
778   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
779   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
780     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
781     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
787 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
788 #include <petscdraw.h>
789 #undef __FUNCT__
790 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
791 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
792 {
793   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
794   PetscErrorCode    ierr;
795   PetscInt          bs   = mat->rmap->bs;
796   PetscMPIInt       rank = baij->rank;
797   PetscBool         iascii,isdraw;
798   PetscViewer       sviewer;
799   PetscViewerFormat format;
800 
801   PetscFunctionBegin;
802   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
803   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
804   if (iascii) {
805     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
806     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
807       MatInfo info;
808       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
809       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
810       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
812       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
816       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
817       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
818       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
819       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
820       PetscFunctionReturn(0);
821     } else if (format == PETSC_VIEWER_ASCII_INFO) {
822       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
823       PetscFunctionReturn(0);
824     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
825       PetscFunctionReturn(0);
826     }
827   }
828 
829   if (isdraw) {
830     PetscDraw draw;
831     PetscBool isnull;
832     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
833     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
834   }
835 
836   {
837     /* assemble the entire matrix onto first processor. */
838     Mat          A;
839     Mat_SeqSBAIJ *Aloc;
840     Mat_SeqBAIJ  *Bloc;
841     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
842     MatScalar    *a;
843     const char   *matname;
844 
845     /* Should this be the same type as mat? */
846     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
847     if (!rank) {
848       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
849     } else {
850       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
851     }
852     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
853     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
854     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
855     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
856 
857     /* copy over the A part */
858     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
859     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
860     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
861 
862     for (i=0; i<mbs; i++) {
863       rvals[0] = bs*(baij->rstartbs + i);
864       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
865       for (j=ai[i]; j<ai[i+1]; j++) {
866         col = (baij->cstartbs+aj[j])*bs;
867         for (k=0; k<bs; k++) {
868           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
869           col++;
870           a += bs;
871         }
872       }
873     }
874     /* copy over the B part */
875     Bloc = (Mat_SeqBAIJ*)baij->B->data;
876     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
877     for (i=0; i<mbs; i++) {
878 
879       rvals[0] = bs*(baij->rstartbs + i);
880       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
881       for (j=ai[i]; j<ai[i+1]; j++) {
882         col = baij->garray[aj[j]]*bs;
883         for (k=0; k<bs; k++) {
884           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
885           col++;
886           a += bs;
887         }
888       }
889     }
890     ierr = PetscFree(rvals);CHKERRQ(ierr);
891     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
892     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
893     /*
894        Everyone has to call to draw the matrix since the graphics waits are
895        synchronized across all processors that share the PetscDraw object
896     */
897     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
898     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
899     if (!rank) {
900       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
901       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
902     }
903     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
904     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
905     ierr = MatDestroy(&A);CHKERRQ(ierr);
906   }
907   PetscFunctionReturn(0);
908 }
909 
910 #undef __FUNCT__
911 #define __FUNCT__ "MatView_MPISBAIJ"
912 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
913 {
914   PetscErrorCode ierr;
915   PetscBool      iascii,isdraw,issocket,isbinary;
916 
917   PetscFunctionBegin;
918   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
919   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
920   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
921   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
922   if (iascii || isdraw || issocket || isbinary) {
923     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatDestroy_MPISBAIJ"
930 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
931 {
932   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
933   PetscErrorCode ierr;
934 
935   PetscFunctionBegin;
936 #if defined(PETSC_USE_LOG)
937   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
938 #endif
939   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
940   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
941   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
942   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
943 #if defined(PETSC_USE_CTABLE)
944   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
945 #else
946   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
947 #endif
948   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
949   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
950   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
951   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
952   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
953   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
954   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
955   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
956   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
957   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
958   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
959   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
960   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
961   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
962   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
963 #if defined(PETSC_USE_REAL_MAT_SINGLE)
964   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
965 #endif
966   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
967   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
968   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
969   ierr = PetscFree(mat->data);CHKERRQ(ierr);
970 
971   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
972   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
973   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
976   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
977 #if defined(PETSC_HAVE_ELEMENTAL)
978   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
979 #endif
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
985 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
986 {
987   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
990   PetscScalar       *from;
991   const PetscScalar *x;
992 
993   PetscFunctionBegin;
994   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
995   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
996 
997   /* diagonal part */
998   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
999   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1000 
1001   /* subdiagonal part */
1002   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1003 
1004   /* copy x into the vec slvec0 */
1005   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1006   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1007 
1008   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1009   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1010   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1011 
1012   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1013   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   /* supperdiagonal part */
1015   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatMult_MPISBAIJ"
1021 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1022 {
1023   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1024   PetscErrorCode    ierr;
1025   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1026   PetscScalar       *from;
1027   const PetscScalar *x;
1028 
1029   PetscFunctionBegin;
1030   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1031   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1032 
1033   /* diagonal part */
1034   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1035   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1036 
1037   /* subdiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1039 
1040   /* copy x into the vec slvec0 */
1041   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1042   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1043 
1044   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1045   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1046   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1047 
1048   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050   /* supperdiagonal part */
1051   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1057 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1058 {
1059   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1060   PetscErrorCode ierr;
1061   PetscInt       nt;
1062 
1063   PetscFunctionBegin;
1064   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1065   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1066 
1067   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1068   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1069 
1070   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071   /* do diagonal part */
1072   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1073   /* do supperdiagonal part */
1074   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1075   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1076   /* do subdiagonal part */
1077   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1078   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1085 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1086 {
1087   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1088   PetscErrorCode    ierr;
1089   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1090   PetscScalar       *from,zero=0.0;
1091   const PetscScalar *x;
1092 
1093   PetscFunctionBegin;
1094   /*
1095   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1096   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1097   */
1098   /* diagonal part */
1099   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1100   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1101 
1102   /* subdiagonal part */
1103   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1104 
1105   /* copy x into the vec slvec0 */
1106   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1107   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1108   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1109   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1110 
1111   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114 
1115   /* supperdiagonal part */
1116   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1122 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1123 {
1124   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1129   /* do diagonal part */
1130   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1131   /* do supperdiagonal part */
1132   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1133   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1134 
1135   /* do subdiagonal part */
1136   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1137   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 /*
1143   This only works correctly for square matrices where the subblock A->A is the
1144    diagonal block
1145 */
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1148 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1149 {
1150   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1151   PetscErrorCode ierr;
1152 
1153   PetscFunctionBegin;
1154   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1155   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatScale_MPISBAIJ"
1161 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1162 {
1163   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1164   PetscErrorCode ierr;
1165 
1166   PetscFunctionBegin;
1167   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1168   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1174 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1175 {
1176   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1177   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1178   PetscErrorCode ierr;
1179   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1180   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1181   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1182 
1183   PetscFunctionBegin;
1184   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1185   mat->getrowactive = PETSC_TRUE;
1186 
1187   if (!mat->rowvalues && (idx || v)) {
1188     /*
1189         allocate enough space to hold information from the longest row.
1190     */
1191     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1192     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1193     PetscInt     max = 1,mbs = mat->mbs,tmp;
1194     for (i=0; i<mbs; i++) {
1195       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1196       if (max < tmp) max = tmp;
1197     }
1198     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1199   }
1200 
1201   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1202   lrow = row - brstart;  /* local row index */
1203 
1204   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1205   if (!v)   {pvA = 0; pvB = 0;}
1206   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1207   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1208   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1209   nztot = nzA + nzB;
1210 
1211   cmap = mat->garray;
1212   if (v  || idx) {
1213     if (nztot) {
1214       /* Sort by increasing column numbers, assuming A and B already sorted */
1215       PetscInt imark = -1;
1216       if (v) {
1217         *v = v_p = mat->rowvalues;
1218         for (i=0; i<nzB; i++) {
1219           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1220           else break;
1221         }
1222         imark = i;
1223         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1224         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1225       }
1226       if (idx) {
1227         *idx = idx_p = mat->rowindices;
1228         if (imark > -1) {
1229           for (i=0; i<imark; i++) {
1230             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1231           }
1232         } else {
1233           for (i=0; i<nzB; i++) {
1234             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1235             else break;
1236           }
1237           imark = i;
1238         }
1239         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1240         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1241       }
1242     } else {
1243       if (idx) *idx = 0;
1244       if (v)   *v   = 0;
1245     }
1246   }
1247   *nz  = nztot;
1248   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1249   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1255 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1256 {
1257   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1258 
1259   PetscFunctionBegin;
1260   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1261   baij->getrowactive = PETSC_FALSE;
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1267 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1268 {
1269   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1270   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1271 
1272   PetscFunctionBegin;
1273   aA->getrow_utriangular = PETSC_TRUE;
1274   PetscFunctionReturn(0);
1275 }
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1278 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1279 {
1280   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1281   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1282 
1283   PetscFunctionBegin;
1284   aA->getrow_utriangular = PETSC_FALSE;
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1290 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1291 {
1292   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1297   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1303 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1304 {
1305   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1306   PetscErrorCode ierr;
1307 
1308   PetscFunctionBegin;
1309   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1310   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1315    Input: isrow       - distributed(parallel),
1316           iscol_local - locally owned (seq)
1317 */
1318 #undef __FUNCT__
1319 #define __FUNCT__ "ISEqual_private"
1320 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1321 {
1322   PetscErrorCode ierr;
1323   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1324   const PetscInt *ptr1,*ptr2;
1325 
1326   PetscFunctionBegin;
1327   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1328   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1329   if (sz1 > sz2) {
1330     *flg = PETSC_FALSE;
1331     PetscFunctionReturn(0);
1332   }
1333 
1334   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1335   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1336 
1337   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1338   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1339   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1340   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1341   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1342   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1343 
1344   nmatch=0;
1345   k     = 0;
1346   for (i=0; i<sz1; i++){
1347     for (j=k; j<sz2; j++){
1348       if (a1[i] == a2[j]) {
1349         k = j; nmatch++;
1350         break;
1351       }
1352     }
1353   }
1354   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1355   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1356   ierr = PetscFree(a1);CHKERRQ(ierr);
1357   ierr = PetscFree(a2);CHKERRQ(ierr);
1358   if (nmatch < sz1) {
1359     *flg = PETSC_FALSE;
1360   } else {
1361     *flg = PETSC_TRUE;
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1368 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1369 {
1370   PetscErrorCode ierr;
1371   IS             iscol_local;
1372   PetscInt       csize;
1373   PetscBool      isequal;
1374 
1375   PetscFunctionBegin;
1376   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1377   if (call == MAT_REUSE_MATRIX) {
1378     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1379     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1380   } else {
1381     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1382     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1383     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1384   }
1385 
1386   /* now call MatGetSubMatrix_MPIBAIJ() */
1387   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1388   if (call == MAT_INITIAL_MATRIX) {
1389     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1390     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1397 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1398 {
1399   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1404   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1410 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1411 {
1412   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1413   Mat            A  = a->A,B = a->B;
1414   PetscErrorCode ierr;
1415   PetscReal      isend[5],irecv[5];
1416 
1417   PetscFunctionBegin;
1418   info->block_size = (PetscReal)matin->rmap->bs;
1419 
1420   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1421 
1422   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1423   isend[3] = info->memory;  isend[4] = info->mallocs;
1424 
1425   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1426 
1427   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1428   isend[3] += info->memory;  isend[4] += info->mallocs;
1429   if (flag == MAT_LOCAL) {
1430     info->nz_used      = isend[0];
1431     info->nz_allocated = isend[1];
1432     info->nz_unneeded  = isend[2];
1433     info->memory       = isend[3];
1434     info->mallocs      = isend[4];
1435   } else if (flag == MAT_GLOBAL_MAX) {
1436     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1437 
1438     info->nz_used      = irecv[0];
1439     info->nz_allocated = irecv[1];
1440     info->nz_unneeded  = irecv[2];
1441     info->memory       = irecv[3];
1442     info->mallocs      = irecv[4];
1443   } else if (flag == MAT_GLOBAL_SUM) {
1444     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1445 
1446     info->nz_used      = irecv[0];
1447     info->nz_allocated = irecv[1];
1448     info->nz_unneeded  = irecv[2];
1449     info->memory       = irecv[3];
1450     info->mallocs      = irecv[4];
1451   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1452   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1453   info->fill_ratio_needed = 0;
1454   info->factor_mallocs    = 0;
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1460 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1461 {
1462   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1463   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1464   PetscErrorCode ierr;
1465 
1466   PetscFunctionBegin;
1467   switch (op) {
1468   case MAT_NEW_NONZERO_LOCATIONS:
1469   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1470   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1471   case MAT_KEEP_NONZERO_PATTERN:
1472   case MAT_NEW_NONZERO_LOCATION_ERR:
1473     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1474     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1475     break;
1476   case MAT_ROW_ORIENTED:
1477     a->roworiented = flg;
1478 
1479     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1480     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1481     break;
1482   case MAT_NEW_DIAGONALS:
1483     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1484     break;
1485   case MAT_IGNORE_OFF_PROC_ENTRIES:
1486     a->donotstash = flg;
1487     break;
1488   case MAT_USE_HASH_TABLE:
1489     a->ht_flag = flg;
1490     break;
1491   case MAT_HERMITIAN:
1492     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1493     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1494 
1495     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1496     break;
1497   case MAT_SPD:
1498     A->spd_set = PETSC_TRUE;
1499     A->spd     = flg;
1500     if (flg) {
1501       A->symmetric                  = PETSC_TRUE;
1502       A->structurally_symmetric     = PETSC_TRUE;
1503       A->symmetric_set              = PETSC_TRUE;
1504       A->structurally_symmetric_set = PETSC_TRUE;
1505     }
1506     break;
1507   case MAT_SYMMETRIC:
1508     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1509     break;
1510   case MAT_STRUCTURALLY_SYMMETRIC:
1511     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1512     break;
1513   case MAT_SYMMETRY_ETERNAL:
1514     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1515     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1516     break;
1517   case MAT_IGNORE_LOWER_TRIANGULAR:
1518     aA->ignore_ltriangular = flg;
1519     break;
1520   case MAT_ERROR_LOWER_TRIANGULAR:
1521     aA->ignore_ltriangular = flg;
1522     break;
1523   case MAT_GETROW_UPPERTRIANGULAR:
1524     aA->getrow_utriangular = flg;
1525     break;
1526   default:
1527     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1534 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1535 {
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   if (MAT_INITIAL_MATRIX || *B != A) {
1540     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1541   }
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1547 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1548 {
1549   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1550   Mat            a     = baij->A, b=baij->B;
1551   PetscErrorCode ierr;
1552   PetscInt       nv,m,n;
1553   PetscBool      flg;
1554 
1555   PetscFunctionBegin;
1556   if (ll != rr) {
1557     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1558     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1559   }
1560   if (!ll) PetscFunctionReturn(0);
1561 
1562   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1563   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1564 
1565   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1566   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1567 
1568   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570   /* left diagonalscale the off-diagonal part */
1571   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1572 
1573   /* scale the diagonal part */
1574   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1575 
1576   /* right diagonalscale the off-diagonal part */
1577   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1584 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1585 {
1586   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1587   PetscErrorCode ierr;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1595 
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatEqual_MPISBAIJ"
1598 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1599 {
1600   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1601   Mat            a,b,c,d;
1602   PetscBool      flg;
1603   PetscErrorCode ierr;
1604 
1605   PetscFunctionBegin;
1606   a = matA->A; b = matA->B;
1607   c = matB->A; d = matB->B;
1608 
1609   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1610   if (flg) {
1611     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1612   }
1613   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatCopy_MPISBAIJ"
1619 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1620 {
1621   PetscErrorCode ierr;
1622   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1623   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1624 
1625   PetscFunctionBegin;
1626   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1627   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1628     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1629     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1630     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1631   } else {
1632     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1633     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1640 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1641 {
1642   PetscErrorCode ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1651 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1652 {
1653   PetscErrorCode ierr;
1654   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1655   PetscBLASInt   bnz,one=1;
1656   Mat_SeqSBAIJ   *xa,*ya;
1657   Mat_SeqBAIJ    *xb,*yb;
1658 
1659   PetscFunctionBegin;
1660   if (str == SAME_NONZERO_PATTERN) {
1661     PetscScalar alpha = a;
1662     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1663     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1664     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1665     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1666     xb   = (Mat_SeqBAIJ*)xx->B->data;
1667     yb   = (Mat_SeqBAIJ*)yy->B->data;
1668     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1669     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1670     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1671   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1672     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1673     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1674     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1675   } else {
1676     Mat      B;
1677     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1678     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1679     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1680     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1681     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1682     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1683     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1684     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1685     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1686     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1687     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1688     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1689     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1690     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1691     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1692     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1693     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1694     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1695     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1696     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1703 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1704 {
1705   PetscErrorCode ierr;
1706   PetscInt       i;
1707   PetscBool      flg,sorted;
1708 
1709   PetscFunctionBegin;
1710   for (i = 0; i < n; i++) {
1711     ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr);
1712     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i);
1713     ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr);
1714     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i);
1715   }
1716   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1717   for (i=0; i<n; i++) {
1718     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1719     if (!flg) { /* *B[i] is non-symmetric, set flag */
1720       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1721     }
1722   }
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatShift_MPISBAIJ"
1728 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1729 {
1730   PetscErrorCode ierr;
1731   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1732   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1733 
1734   PetscFunctionBegin;
1735   if (!Y->preallocated) {
1736     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1737   } else if (!aij->nz) {
1738     PetscInt nonew = aij->nonew;
1739     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1740     aij->nonew = nonew;
1741   }
1742   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 /* -------------------------------------------------------------------*/
1747 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1748                                        MatGetRow_MPISBAIJ,
1749                                        MatRestoreRow_MPISBAIJ,
1750                                        MatMult_MPISBAIJ,
1751                                /*  4*/ MatMultAdd_MPISBAIJ,
1752                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1753                                        MatMultAdd_MPISBAIJ,
1754                                        0,
1755                                        0,
1756                                        0,
1757                                /* 10*/ 0,
1758                                        0,
1759                                        0,
1760                                        MatSOR_MPISBAIJ,
1761                                        MatTranspose_MPISBAIJ,
1762                                /* 15*/ MatGetInfo_MPISBAIJ,
1763                                        MatEqual_MPISBAIJ,
1764                                        MatGetDiagonal_MPISBAIJ,
1765                                        MatDiagonalScale_MPISBAIJ,
1766                                        MatNorm_MPISBAIJ,
1767                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1768                                        MatAssemblyEnd_MPISBAIJ,
1769                                        MatSetOption_MPISBAIJ,
1770                                        MatZeroEntries_MPISBAIJ,
1771                                /* 24*/ 0,
1772                                        0,
1773                                        0,
1774                                        0,
1775                                        0,
1776                                /* 29*/ MatSetUp_MPISBAIJ,
1777                                        0,
1778                                        0,
1779                                        0,
1780                                        0,
1781                                /* 34*/ MatDuplicate_MPISBAIJ,
1782                                        0,
1783                                        0,
1784                                        0,
1785                                        0,
1786                                /* 39*/ MatAXPY_MPISBAIJ,
1787                                        MatGetSubMatrices_MPISBAIJ,
1788                                        MatIncreaseOverlap_MPISBAIJ,
1789                                        MatGetValues_MPISBAIJ,
1790                                        MatCopy_MPISBAIJ,
1791                                /* 44*/ 0,
1792                                        MatScale_MPISBAIJ,
1793                                        MatShift_MPISBAIJ,
1794                                        0,
1795                                        0,
1796                                /* 49*/ 0,
1797                                        0,
1798                                        0,
1799                                        0,
1800                                        0,
1801                                /* 54*/ 0,
1802                                        0,
1803                                        MatSetUnfactored_MPISBAIJ,
1804                                        0,
1805                                        MatSetValuesBlocked_MPISBAIJ,
1806                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1807                                        0,
1808                                        0,
1809                                        0,
1810                                        0,
1811                                /* 64*/ 0,
1812                                        0,
1813                                        0,
1814                                        0,
1815                                        0,
1816                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1817                                        0,
1818                                        0,
1819                                        0,
1820                                        0,
1821                                /* 74*/ 0,
1822                                        0,
1823                                        0,
1824                                        0,
1825                                        0,
1826                                /* 79*/ 0,
1827                                        0,
1828                                        0,
1829                                        0,
1830                                        MatLoad_MPISBAIJ,
1831                                /* 84*/ 0,
1832                                        0,
1833                                        0,
1834                                        0,
1835                                        0,
1836                                /* 89*/ 0,
1837                                        0,
1838                                        0,
1839                                        0,
1840                                        0,
1841                                /* 94*/ 0,
1842                                        0,
1843                                        0,
1844                                        0,
1845                                        0,
1846                                /* 99*/ 0,
1847                                        0,
1848                                        0,
1849                                        0,
1850                                        0,
1851                                /*104*/ 0,
1852                                        MatRealPart_MPISBAIJ,
1853                                        MatImaginaryPart_MPISBAIJ,
1854                                        MatGetRowUpperTriangular_MPISBAIJ,
1855                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1856                                /*109*/ 0,
1857                                        0,
1858                                        0,
1859                                        0,
1860                                        0,
1861                                /*114*/ 0,
1862                                        0,
1863                                        0,
1864                                        0,
1865                                        0,
1866                                /*119*/ 0,
1867                                        0,
1868                                        0,
1869                                        0,
1870                                        0,
1871                                /*124*/ 0,
1872                                        0,
1873                                        0,
1874                                        0,
1875                                        0,
1876                                /*129*/ 0,
1877                                        0,
1878                                        0,
1879                                        0,
1880                                        0,
1881                                /*134*/ 0,
1882                                        0,
1883                                        0,
1884                                        0,
1885                                        0,
1886                                /*139*/ 0,
1887                                        0,
1888                                        0,
1889                                        0,
1890                                        0,
1891                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1892 };
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1896 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1897 {
1898   PetscFunctionBegin;
1899   *a = ((Mat_MPISBAIJ*)A->data)->A;
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1905 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1906 {
1907   Mat_MPISBAIJ   *b;
1908   PetscErrorCode ierr;
1909   PetscInt       i,mbs,Mbs;
1910 
1911   PetscFunctionBegin;
1912   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1913   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1914   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1915   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1916 
1917   b   = (Mat_MPISBAIJ*)B->data;
1918   mbs = B->rmap->n/bs;
1919   Mbs = B->rmap->N/bs;
1920   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1921 
1922   B->rmap->bs = bs;
1923   b->bs2      = bs*bs;
1924   b->mbs      = mbs;
1925   b->Mbs      = Mbs;
1926   b->nbs      = B->cmap->n/bs;
1927   b->Nbs      = B->cmap->N/bs;
1928 
1929   for (i=0; i<=b->size; i++) {
1930     b->rangebs[i] = B->rmap->range[i]/bs;
1931   }
1932   b->rstartbs = B->rmap->rstart/bs;
1933   b->rendbs   = B->rmap->rend/bs;
1934 
1935   b->cstartbs = B->cmap->rstart/bs;
1936   b->cendbs   = B->cmap->rend/bs;
1937 
1938   if (!B->preallocated) {
1939     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1940     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1941     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1942     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1943     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1944     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1945     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1946     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1947     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1948   }
1949 
1950   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1951   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1952 
1953   B->preallocated = PETSC_TRUE;
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1959 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1960 {
1961   PetscInt       m,rstart,cstart,cend;
1962   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1963   const PetscInt *JJ    =0;
1964   PetscScalar    *values=0;
1965   PetscErrorCode ierr;
1966 
1967   PetscFunctionBegin;
1968   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1969   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1970   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1971   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1972   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1973   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1974   m      = B->rmap->n/bs;
1975   rstart = B->rmap->rstart/bs;
1976   cstart = B->cmap->rstart/bs;
1977   cend   = B->cmap->rend/bs;
1978 
1979   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1980   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1981   for (i=0; i<m; i++) {
1982     nz = ii[i+1] - ii[i];
1983     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1984     nz_max = PetscMax(nz_max,nz);
1985     JJ     = jj + ii[i];
1986     for (j=0; j<nz; j++) {
1987       if (*JJ >= cstart) break;
1988       JJ++;
1989     }
1990     d = 0;
1991     for (; j<nz; j++) {
1992       if (*JJ++ >= cend) break;
1993       d++;
1994     }
1995     d_nnz[i] = d;
1996     o_nnz[i] = nz - d;
1997   }
1998   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1999   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2000 
2001   values = (PetscScalar*)V;
2002   if (!values) {
2003     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2004     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2005   }
2006   for (i=0; i<m; i++) {
2007     PetscInt          row    = i + rstart;
2008     PetscInt          ncols  = ii[i+1] - ii[i];
2009     const PetscInt    *icols = jj + ii[i];
2010     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2011     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2012   }
2013 
2014   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2015   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2016   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2017   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 /*MC
2022    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2023    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2024    the matrix is stored.
2025 
2026   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2027   can call MatSetOption(Mat, MAT_HERMITIAN);
2028 
2029    Options Database Keys:
2030 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2031 
2032   Level: beginner
2033 
2034 .seealso: MatCreateMPISBAIJ
2035 M*/
2036 
2037 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatCreate_MPISBAIJ"
2041 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2042 {
2043   Mat_MPISBAIJ   *b;
2044   PetscErrorCode ierr;
2045   PetscBool      flg = PETSC_FALSE;
2046 
2047   PetscFunctionBegin;
2048   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2049   B->data = (void*)b;
2050   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2051 
2052   B->ops->destroy = MatDestroy_MPISBAIJ;
2053   B->ops->view    = MatView_MPISBAIJ;
2054   B->assembled    = PETSC_FALSE;
2055   B->insertmode   = NOT_SET_VALUES;
2056 
2057   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2058   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2059 
2060   /* build local table of row and column ownerships */
2061   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2062 
2063   /* build cache for off array entries formed */
2064   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2065 
2066   b->donotstash  = PETSC_FALSE;
2067   b->colmap      = NULL;
2068   b->garray      = NULL;
2069   b->roworiented = PETSC_TRUE;
2070 
2071   /* stuff used in block assembly */
2072   b->barray = 0;
2073 
2074   /* stuff used for matrix vector multiply */
2075   b->lvec    = 0;
2076   b->Mvctx   = 0;
2077   b->slvec0  = 0;
2078   b->slvec0b = 0;
2079   b->slvec1  = 0;
2080   b->slvec1a = 0;
2081   b->slvec1b = 0;
2082   b->sMvctx  = 0;
2083 
2084   /* stuff for MatGetRow() */
2085   b->rowindices   = 0;
2086   b->rowvalues    = 0;
2087   b->getrowactive = PETSC_FALSE;
2088 
2089   /* hash table stuff */
2090   b->ht           = 0;
2091   b->hd           = 0;
2092   b->ht_size      = 0;
2093   b->ht_flag      = PETSC_FALSE;
2094   b->ht_fact      = 0;
2095   b->ht_total_ct  = 0;
2096   b->ht_insert_ct = 0;
2097 
2098   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2099   b->ijonly = PETSC_FALSE;
2100 
2101   b->in_loc = 0;
2102   b->v_loc  = 0;
2103   b->n_loc  = 0;
2104 
2105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
2111 #if defined(PETSC_HAVE_ELEMENTAL)
2112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2113 #endif
2114 
2115   B->symmetric                  = PETSC_TRUE;
2116   B->structurally_symmetric     = PETSC_TRUE;
2117   B->symmetric_set              = PETSC_TRUE;
2118   B->structurally_symmetric_set = PETSC_TRUE;
2119 
2120   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2121   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2122   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2123   if (flg) {
2124     PetscReal fact = 1.39;
2125     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2126     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2127     if (fact <= 1.0) fact = 1.39;
2128     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2129     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2130   }
2131   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 /*MC
2136    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2137 
2138    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2139    and MATMPISBAIJ otherwise.
2140 
2141    Options Database Keys:
2142 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2143 
2144   Level: beginner
2145 
2146 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2147 M*/
2148 
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2151 /*@C
2152    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2153    the user should preallocate the matrix storage by setting the parameters
2154    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2155    performance can be increased by more than a factor of 50.
2156 
2157    Collective on Mat
2158 
2159    Input Parameters:
2160 +  B - the matrix
2161 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2162           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2163 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2164            submatrix  (same for all local rows)
2165 .  d_nnz - array containing the number of block nonzeros in the various block rows
2166            in the upper triangular and diagonal part of the in diagonal portion of the local
2167            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2168            for the diagonal entry and set a value even if it is zero.
2169 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2170            submatrix (same for all local rows).
2171 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2172            off-diagonal portion of the local submatrix that is right of the diagonal
2173            (possibly different for each block row) or NULL.
2174 
2175 
2176    Options Database Keys:
2177 .   -mat_no_unroll - uses code that does not unroll the loops in the
2178                      block calculations (much slower)
2179 .   -mat_block_size - size of the blocks to use
2180 
2181    Notes:
2182 
2183    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2184    than it must be used on all processors that share the object for that argument.
2185 
2186    If the *_nnz parameter is given then the *_nz parameter is ignored
2187 
2188    Storage Information:
2189    For a square global matrix we define each processor's diagonal portion
2190    to be its local rows and the corresponding columns (a square submatrix);
2191    each processor's off-diagonal portion encompasses the remainder of the
2192    local matrix (a rectangular submatrix).
2193 
2194    The user can specify preallocated storage for the diagonal part of
2195    the local submatrix with either d_nz or d_nnz (not both).  Set
2196    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2197    memory allocation.  Likewise, specify preallocated storage for the
2198    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2199 
2200    You can call MatGetInfo() to get information on how effective the preallocation was;
2201    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2202    You can also run with the option -info and look for messages with the string
2203    malloc in them to see if additional memory allocation was needed.
2204 
2205    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2206    the figure below we depict these three local rows and all columns (0-11).
2207 
2208 .vb
2209            0 1 2 3 4 5 6 7 8 9 10 11
2210           --------------------------
2211    row 3  |. . . d d d o o o o  o  o
2212    row 4  |. . . d d d o o o o  o  o
2213    row 5  |. . . d d d o o o o  o  o
2214           --------------------------
2215 .ve
2216 
2217    Thus, any entries in the d locations are stored in the d (diagonal)
2218    submatrix, and any entries in the o locations are stored in the
2219    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2220    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2221 
2222    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2223    plus the diagonal part of the d matrix,
2224    and o_nz should indicate the number of block nonzeros per row in the o matrix
2225 
2226    In general, for PDE problems in which most nonzeros are near the diagonal,
2227    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2228    or you will get TERRIBLE performance; see the users' manual chapter on
2229    matrices.
2230 
2231    Level: intermediate
2232 
2233 .keywords: matrix, block, aij, compressed row, sparse, parallel
2234 
2235 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2236 @*/
2237 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2238 {
2239   PetscErrorCode ierr;
2240 
2241   PetscFunctionBegin;
2242   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2243   PetscValidType(B,1);
2244   PetscValidLogicalCollectiveInt(B,bs,2);
2245   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 #undef __FUNCT__
2250 #define __FUNCT__ "MatCreateSBAIJ"
2251 /*@C
2252    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2253    (block compressed row).  For good matrix assembly performance
2254    the user should preallocate the matrix storage by setting the parameters
2255    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2256    performance can be increased by more than a factor of 50.
2257 
2258    Collective on MPI_Comm
2259 
2260    Input Parameters:
2261 +  comm - MPI communicator
2262 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2263           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2264 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2265            This value should be the same as the local size used in creating the
2266            y vector for the matrix-vector product y = Ax.
2267 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2268            This value should be the same as the local size used in creating the
2269            x vector for the matrix-vector product y = Ax.
2270 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2271 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2272 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2273            submatrix  (same for all local rows)
2274 .  d_nnz - array containing the number of block nonzeros in the various block rows
2275            in the upper triangular portion of the in diagonal portion of the local
2276            (possibly different for each block block row) or NULL.
2277            If you plan to factor the matrix you must leave room for the diagonal entry and
2278            set its value even if it is zero.
2279 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2280            submatrix (same for all local rows).
2281 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2282            off-diagonal portion of the local submatrix (possibly different for
2283            each block row) or NULL.
2284 
2285    Output Parameter:
2286 .  A - the matrix
2287 
2288    Options Database Keys:
2289 .   -mat_no_unroll - uses code that does not unroll the loops in the
2290                      block calculations (much slower)
2291 .   -mat_block_size - size of the blocks to use
2292 .   -mat_mpi - use the parallel matrix data structures even on one processor
2293                (defaults to using SeqBAIJ format on one processor)
2294 
2295    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2296    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2297    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2298 
2299    Notes:
2300    The number of rows and columns must be divisible by blocksize.
2301    This matrix type does not support complex Hermitian operation.
2302 
2303    The user MUST specify either the local or global matrix dimensions
2304    (possibly both).
2305 
2306    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2307    than it must be used on all processors that share the object for that argument.
2308 
2309    If the *_nnz parameter is given then the *_nz parameter is ignored
2310 
2311    Storage Information:
2312    For a square global matrix we define each processor's diagonal portion
2313    to be its local rows and the corresponding columns (a square submatrix);
2314    each processor's off-diagonal portion encompasses the remainder of the
2315    local matrix (a rectangular submatrix).
2316 
2317    The user can specify preallocated storage for the diagonal part of
2318    the local submatrix with either d_nz or d_nnz (not both).  Set
2319    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2320    memory allocation.  Likewise, specify preallocated storage for the
2321    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2322 
2323    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2324    the figure below we depict these three local rows and all columns (0-11).
2325 
2326 .vb
2327            0 1 2 3 4 5 6 7 8 9 10 11
2328           --------------------------
2329    row 3  |. . . d d d o o o o  o  o
2330    row 4  |. . . d d d o o o o  o  o
2331    row 5  |. . . d d d o o o o  o  o
2332           --------------------------
2333 .ve
2334 
2335    Thus, any entries in the d locations are stored in the d (diagonal)
2336    submatrix, and any entries in the o locations are stored in the
2337    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2338    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2339 
2340    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2341    plus the diagonal part of the d matrix,
2342    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2343    In general, for PDE problems in which most nonzeros are near the diagonal,
2344    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2345    or you will get TERRIBLE performance; see the users' manual chapter on
2346    matrices.
2347 
2348    Level: intermediate
2349 
2350 .keywords: matrix, block, aij, compressed row, sparse, parallel
2351 
2352 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2353 @*/
2354 
2355 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2356 {
2357   PetscErrorCode ierr;
2358   PetscMPIInt    size;
2359 
2360   PetscFunctionBegin;
2361   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2362   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2363   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2364   if (size > 1) {
2365     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2366     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2367   } else {
2368     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2369     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2370   }
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 
2375 #undef __FUNCT__
2376 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2377 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2378 {
2379   Mat            mat;
2380   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2381   PetscErrorCode ierr;
2382   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2383   PetscScalar    *array;
2384 
2385   PetscFunctionBegin;
2386   *newmat = 0;
2387 
2388   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2389   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2390   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2391   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2392   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2393   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2394 
2395   mat->factortype   = matin->factortype;
2396   mat->preallocated = PETSC_TRUE;
2397   mat->assembled    = PETSC_TRUE;
2398   mat->insertmode   = NOT_SET_VALUES;
2399 
2400   a      = (Mat_MPISBAIJ*)mat->data;
2401   a->bs2 = oldmat->bs2;
2402   a->mbs = oldmat->mbs;
2403   a->nbs = oldmat->nbs;
2404   a->Mbs = oldmat->Mbs;
2405   a->Nbs = oldmat->Nbs;
2406 
2407 
2408   a->size         = oldmat->size;
2409   a->rank         = oldmat->rank;
2410   a->donotstash   = oldmat->donotstash;
2411   a->roworiented  = oldmat->roworiented;
2412   a->rowindices   = 0;
2413   a->rowvalues    = 0;
2414   a->getrowactive = PETSC_FALSE;
2415   a->barray       = 0;
2416   a->rstartbs     = oldmat->rstartbs;
2417   a->rendbs       = oldmat->rendbs;
2418   a->cstartbs     = oldmat->cstartbs;
2419   a->cendbs       = oldmat->cendbs;
2420 
2421   /* hash table stuff */
2422   a->ht           = 0;
2423   a->hd           = 0;
2424   a->ht_size      = 0;
2425   a->ht_flag      = oldmat->ht_flag;
2426   a->ht_fact      = oldmat->ht_fact;
2427   a->ht_total_ct  = 0;
2428   a->ht_insert_ct = 0;
2429 
2430   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2431   if (oldmat->colmap) {
2432 #if defined(PETSC_USE_CTABLE)
2433     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2434 #else
2435     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2436     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2437     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2438 #endif
2439   } else a->colmap = 0;
2440 
2441   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2442     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2443     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2444     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2445   } else a->garray = 0;
2446 
2447   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2448   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2450   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2451   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2452 
2453   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2454   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2455   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2456   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2457 
2458   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2459   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2460   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2461   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2462   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2463   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2464   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2465   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2466   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2467   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2468   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2469   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2470   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2471 
2472   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2473   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2474   a->sMvctx = oldmat->sMvctx;
2475   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2476 
2477   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2478   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2479   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2480   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2481   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2482   *newmat = mat;
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatLoad_MPISBAIJ"
2488 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2489 {
2490   PetscErrorCode ierr;
2491   PetscInt       i,nz,j,rstart,rend;
2492   PetscScalar    *vals,*buf;
2493   MPI_Comm       comm;
2494   MPI_Status     status;
2495   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2496   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2497   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2498   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2499   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2500   PetscInt       dcount,kmax,k,nzcount,tmp;
2501   int            fd;
2502 
2503   PetscFunctionBegin;
2504   /* force binary viewer to load .info file if it has not yet done so */
2505   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2506   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2507   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2508   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2509   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2510   if (bs < 0) bs = 1;
2511 
2512   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2513   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2514   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2515   if (!rank) {
2516     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2517     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2518     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2519   }
2520 
2521   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2522   M    = header[1];
2523   N    = header[2];
2524 
2525   /* If global sizes are set, check if they are consistent with that given in the file */
2526   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2527   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2528 
2529   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2530 
2531   /*
2532      This code adds extra rows to make sure the number of rows is
2533      divisible by the blocksize
2534   */
2535   Mbs        = M/bs;
2536   extra_rows = bs - M + bs*(Mbs);
2537   if (extra_rows == bs) extra_rows = 0;
2538   else                  Mbs++;
2539   if (extra_rows &&!rank) {
2540     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2541   }
2542 
2543   /* determine ownership of all rows */
2544   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2545     mbs = Mbs/size + ((Mbs % size) > rank);
2546     m   = mbs*bs;
2547   } else { /* User Set */
2548     m   = newmat->rmap->n;
2549     mbs = m/bs;
2550   }
2551   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2552   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2553   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2554   rowners[0] = 0;
2555   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2556   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2557   rstart = rowners[rank];
2558   rend   = rowners[rank+1];
2559 
2560   /* distribute row lengths to all processors */
2561   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2562   if (!rank) {
2563     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2564     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2565     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2566     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2567     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2568     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2569     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2570   } else {
2571     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2572   }
2573 
2574   if (!rank) {   /* procs[0] */
2575     /* calculate the number of nonzeros on each processor */
2576     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2577     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2578     for (i=0; i<size; i++) {
2579       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2580         procsnz[i] += rowlengths[j];
2581       }
2582     }
2583     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2584 
2585     /* determine max buffer needed and allocate it */
2586     maxnz = 0;
2587     for (i=0; i<size; i++) {
2588       maxnz = PetscMax(maxnz,procsnz[i]);
2589     }
2590     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2591 
2592     /* read in my part of the matrix column indices  */
2593     nz     = procsnz[0];
2594     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2595     mycols = ibuf;
2596     if (size == 1) nz -= extra_rows;
2597     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2598     if (size == 1) {
2599       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2600     }
2601 
2602     /* read in every ones (except the last) and ship off */
2603     for (i=1; i<size-1; i++) {
2604       nz   = procsnz[i];
2605       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2606       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2607     }
2608     /* read in the stuff for the last proc */
2609     if (size != 1) {
2610       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2611       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2612       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2613       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2614     }
2615     ierr = PetscFree(cols);CHKERRQ(ierr);
2616   } else {  /* procs[i], i>0 */
2617     /* determine buffer space needed for message */
2618     nz = 0;
2619     for (i=0; i<m; i++) nz += locrowlens[i];
2620     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2621     mycols = ibuf;
2622     /* receive message of column indices*/
2623     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2624     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2625     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2626   }
2627 
2628   /* loop over local rows, determining number of off diagonal entries */
2629   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2630   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2631   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2632   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2633   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2634   rowcount = 0;
2635   nzcount  = 0;
2636   for (i=0; i<mbs; i++) {
2637     dcount  = 0;
2638     odcount = 0;
2639     for (j=0; j<bs; j++) {
2640       kmax = locrowlens[rowcount];
2641       for (k=0; k<kmax; k++) {
2642         tmp = mycols[nzcount++]/bs; /* block col. index */
2643         if (!mask[tmp]) {
2644           mask[tmp] = 1;
2645           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2646           else masked1[dcount++] = tmp; /* entry in diag portion */
2647         }
2648       }
2649       rowcount++;
2650     }
2651 
2652     dlens[i]  = dcount;  /* d_nzz[i] */
2653     odlens[i] = odcount; /* o_nzz[i] */
2654 
2655     /* zero out the mask elements we set */
2656     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2657     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2658   }
2659   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2660   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2661   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2662 
2663   if (!rank) {
2664     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2665     /* read in my part of the matrix numerical values  */
2666     nz     = procsnz[0];
2667     vals   = buf;
2668     mycols = ibuf;
2669     if (size == 1) nz -= extra_rows;
2670     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2671     if (size == 1) {
2672       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2673     }
2674 
2675     /* insert into matrix */
2676     jj = rstart*bs;
2677     for (i=0; i<m; i++) {
2678       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2679       mycols += locrowlens[i];
2680       vals   += locrowlens[i];
2681       jj++;
2682     }
2683 
2684     /* read in other processors (except the last one) and ship out */
2685     for (i=1; i<size-1; i++) {
2686       nz   = procsnz[i];
2687       vals = buf;
2688       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2689       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2690     }
2691     /* the last proc */
2692     if (size != 1) {
2693       nz   = procsnz[i] - extra_rows;
2694       vals = buf;
2695       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2696       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2697       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2698     }
2699     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2700 
2701   } else {
2702     /* receive numeric values */
2703     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2704 
2705     /* receive message of values*/
2706     vals   = buf;
2707     mycols = ibuf;
2708     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2709     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2710     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2711 
2712     /* insert into matrix */
2713     jj = rstart*bs;
2714     for (i=0; i<m; i++) {
2715       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2716       mycols += locrowlens[i];
2717       vals   += locrowlens[i];
2718       jj++;
2719     }
2720   }
2721 
2722   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2723   ierr = PetscFree(buf);CHKERRQ(ierr);
2724   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2725   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2726   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2727   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2728   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2729   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2730   PetscFunctionReturn(0);
2731 }
2732 
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2735 /*XXXXX@
2736    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2737 
2738    Input Parameters:
2739 .  mat  - the matrix
2740 .  fact - factor
2741 
2742    Not Collective on Mat, each process can have a different hash factor
2743 
2744    Level: advanced
2745 
2746   Notes:
2747    This can also be set by the command line option: -mat_use_hash_table fact
2748 
2749 .keywords: matrix, hashtable, factor, HT
2750 
2751 .seealso: MatSetOption()
2752 @XXXXX*/
2753 
2754 
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2757 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2758 {
2759   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2760   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2761   PetscReal      atmp;
2762   PetscReal      *work,*svalues,*rvalues;
2763   PetscErrorCode ierr;
2764   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2765   PetscMPIInt    rank,size;
2766   PetscInt       *rowners_bs,dest,count,source;
2767   PetscScalar    *va;
2768   MatScalar      *ba;
2769   MPI_Status     stat;
2770 
2771   PetscFunctionBegin;
2772   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2773   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2774   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2775 
2776   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2777   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2778 
2779   bs  = A->rmap->bs;
2780   mbs = a->mbs;
2781   Mbs = a->Mbs;
2782   ba  = b->a;
2783   bi  = b->i;
2784   bj  = b->j;
2785 
2786   /* find ownerships */
2787   rowners_bs = A->rmap->range;
2788 
2789   /* each proc creates an array to be distributed */
2790   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2791   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2792 
2793   /* row_max for B */
2794   if (rank != size-1) {
2795     for (i=0; i<mbs; i++) {
2796       ncols = bi[1] - bi[0]; bi++;
2797       brow  = bs*i;
2798       for (j=0; j<ncols; j++) {
2799         bcol = bs*(*bj);
2800         for (kcol=0; kcol<bs; kcol++) {
2801           col  = bcol + kcol;                /* local col index */
2802           col += rowners_bs[rank+1];      /* global col index */
2803           for (krow=0; krow<bs; krow++) {
2804             atmp = PetscAbsScalar(*ba); ba++;
2805             row  = brow + krow;   /* local row index */
2806             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2807             if (work[col] < atmp) work[col] = atmp;
2808           }
2809         }
2810         bj++;
2811       }
2812     }
2813 
2814     /* send values to its owners */
2815     for (dest=rank+1; dest<size; dest++) {
2816       svalues = work + rowners_bs[dest];
2817       count   = rowners_bs[dest+1]-rowners_bs[dest];
2818       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2819     }
2820   }
2821 
2822   /* receive values */
2823   if (rank) {
2824     rvalues = work;
2825     count   = rowners_bs[rank+1]-rowners_bs[rank];
2826     for (source=0; source<rank; source++) {
2827       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2828       /* process values */
2829       for (i=0; i<count; i++) {
2830         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2831       }
2832     }
2833   }
2834 
2835   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2836   ierr = PetscFree(work);CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatSOR_MPISBAIJ"
2842 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2843 {
2844   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2845   PetscErrorCode    ierr;
2846   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2847   PetscScalar       *x,*ptr,*from;
2848   Vec               bb1;
2849   const PetscScalar *b;
2850 
2851   PetscFunctionBegin;
2852   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2853   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2854 
2855   if (flag == SOR_APPLY_UPPER) {
2856     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2857     PetscFunctionReturn(0);
2858   }
2859 
2860   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2861     if (flag & SOR_ZERO_INITIAL_GUESS) {
2862       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2863       its--;
2864     }
2865 
2866     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2867     while (its--) {
2868 
2869       /* lower triangular part: slvec0b = - B^T*xx */
2870       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2871 
2872       /* copy xx into slvec0a */
2873       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2874       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2875       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2876       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2877 
2878       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2879 
2880       /* copy bb into slvec1a */
2881       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2882       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2883       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2884       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2885 
2886       /* set slvec1b = 0 */
2887       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2888 
2889       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2890       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2891       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2892       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2893 
2894       /* upper triangular part: bb1 = bb1 - B*x */
2895       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2896 
2897       /* local diagonal sweep */
2898       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2899     }
2900     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2901   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2902     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2903   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2904     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2905   } else if (flag & SOR_EISENSTAT) {
2906     Vec               xx1;
2907     PetscBool         hasop;
2908     const PetscScalar *diag;
2909     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2910     PetscInt          i,n;
2911 
2912     if (!mat->xx1) {
2913       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2914       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2915     }
2916     xx1 = mat->xx1;
2917     bb1 = mat->bb1;
2918 
2919     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2920 
2921     if (!mat->diag) {
2922       /* this is wrong for same matrix with new nonzero values */
2923       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2924       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2925     }
2926     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2927 
2928     if (hasop) {
2929       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2930       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2931     } else {
2932       /*
2933           These two lines are replaced by code that may be a bit faster for a good compiler
2934       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2935       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2936       */
2937       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2938       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2939       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2940       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2941       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2942       if (omega == 1.0) {
2943         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2944         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2945       } else {
2946         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2947         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2948       }
2949       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2950       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2951       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2952       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2953     }
2954 
2955     /* multiply off-diagonal portion of matrix */
2956     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2957     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2958     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2959     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2960     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2961     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2962     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2963     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2964     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2965     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2966 
2967     /* local sweep */
2968     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2969     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2970   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 #undef __FUNCT__
2975 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2976 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2977 {
2978   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2979   PetscErrorCode ierr;
2980   Vec            lvec1,bb1;
2981 
2982   PetscFunctionBegin;
2983   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2984   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2985 
2986   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2987     if (flag & SOR_ZERO_INITIAL_GUESS) {
2988       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2989       its--;
2990     }
2991 
2992     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2993     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2994     while (its--) {
2995       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2996 
2997       /* lower diagonal part: bb1 = bb - B^T*xx */
2998       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2999       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3000 
3001       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3002       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3003       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3004 
3005       /* upper diagonal part: bb1 = bb1 - B*x */
3006       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3007       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3008 
3009       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3010 
3011       /* diagonal sweep */
3012       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3013     }
3014     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3015     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3016   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 #undef __FUNCT__
3021 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3022 /*@
3023      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3024          CSR format the local rows.
3025 
3026    Collective on MPI_Comm
3027 
3028    Input Parameters:
3029 +  comm - MPI communicator
3030 .  bs - the block size, only a block size of 1 is supported
3031 .  m - number of local rows (Cannot be PETSC_DECIDE)
3032 .  n - This value should be the same as the local size used in creating the
3033        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3034        calculated if N is given) For square matrices n is almost always m.
3035 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3036 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3037 .   i - row indices
3038 .   j - column indices
3039 -   a - matrix values
3040 
3041    Output Parameter:
3042 .   mat - the matrix
3043 
3044    Level: intermediate
3045 
3046    Notes:
3047        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3048      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3049      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3050 
3051        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3052 
3053 .keywords: matrix, aij, compressed row, sparse, parallel
3054 
3055 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3056           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3057 @*/
3058 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3059 {
3060   PetscErrorCode ierr;
3061 
3062 
3063   PetscFunctionBegin;
3064   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3065   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3066   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3067   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3068   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3069   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3076 /*@C
3077    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3078    (the default parallel PETSc format).
3079 
3080    Collective on MPI_Comm
3081 
3082    Input Parameters:
3083 +  B - the matrix
3084 .  bs - the block size
3085 .  i - the indices into j for the start of each local row (starts with zero)
3086 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3087 -  v - optional values in the matrix
3088 
3089    Level: developer
3090 
3091 .keywords: matrix, aij, compressed row, sparse, parallel
3092 
3093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3094 @*/
3095 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3096 {
3097   PetscErrorCode ierr;
3098 
3099   PetscFunctionBegin;
3100   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 #undef __FUNCT__
3105 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3106 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3107 {
3108   PetscErrorCode ierr;
3109   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3110   PetscInt       *indx;
3111   PetscScalar    *values;
3112 
3113   PetscFunctionBegin;
3114   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3115   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3116     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3117     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3118     PetscInt       *bindx,rmax=a->rmax,j;
3119 
3120     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3121     mbs = m/bs; Nbs = N/cbs;
3122     if (n == PETSC_DECIDE) {
3123       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3124     }
3125     /* Check sum(n) = Nbs */
3126     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3127     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3128 
3129     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3130     rstart -= mbs;
3131 
3132     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3133     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3134     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3135     for (i=0; i<mbs; i++) {
3136       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3137       nnz = nnz/bs;
3138       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3139       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3140       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3141     }
3142     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3143     ierr = PetscFree(bindx);CHKERRQ(ierr);
3144 
3145     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3146     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3147     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3148     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3149     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3150     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3151   }
3152 
3153   /* numeric phase */
3154   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3155   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3156 
3157   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3158   for (i=0; i<m; i++) {
3159     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3160     Ii   = i + rstart;
3161     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3162     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3163   }
3164   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3165   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3166   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3167   PetscFunctionReturn(0);
3168 }
3169