xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision afebec48acb8b1040c5bfa54d0857ffcc0c220e6)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636       }
637       /* Bmat */
638       v = bmat->a; jj = bmat->j;
639       for (brow=0; brow<mbs; brow++) {
640         grow = bs*(rstart + brow);
641         nz = bmat->i[brow+1] - bmat->i[brow];
642         for (bcol=0; bcol<nz; bcol++) {
643           gcol = bs*garray[*jj]; jj++;
644           for (col=0; col<bs; col++) {
645             for (row=0; row<bs; row++) {
646               vabs            = PetscAbsScalar(*v); v++;
647               rsum[gcol+col] += vabs;
648               rsum[grow+row] += vabs;
649             }
650           }
651         }
652       }
653       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
654       *norm = 0.0;
655       for (col=0; col<mat->cmap->N; col++) {
656         if (rsum2[col] > *norm) *norm = rsum2[col];
657       }
658       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
676   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
677   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
678   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
679   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
680   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
686 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
687 {
688   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
689   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
690   PetscErrorCode ierr;
691   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscMPIInt    n;
695   PetscBool      r1,r2,r3;
696   MatScalar      *val;
697 
698   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
699   PetscFunctionBegin;
700   if (!baij->donotstash &&  !mat->nooffprocentries) {
701     while (1) {
702       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
703       if (!flg) break;
704 
705       for (i=0; i<n;) {
706         /* Now identify the consecutive vals belonging to the same row */
707         for (j=i,rstart=row[j]; j<n; j++) {
708           if (row[j] != rstart) break;
709         }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
714         i    = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718     /* Now process the block-stash. Since the values are stashed column-oriented,
719        set the roworiented flag to column oriented, and after MatSetValues()
720        restore the original flags */
721     r1 = baij->roworiented;
722     r2 = a->roworiented;
723     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
724 
725     baij->roworiented = PETSC_FALSE;
726     a->roworiented    = PETSC_FALSE;
727 
728     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
729     while (1) {
730       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
731       if (!flg) break;
732 
733       for (i=0; i<n;) {
734         /* Now identify the consecutive vals belonging to the same row */
735         for (j=i,rstart=row[j]; j<n; j++) {
736           if (row[j] != rstart) break;
737         }
738         if (j < n) ncols = j-i;
739         else       ncols = n-i;
740         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
741         i    = j;
742       }
743     }
744     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
745 
746     baij->roworiented = r1;
747     a->roworiented    = r2;
748 
749     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
750   }
751 
752   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
753   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
754 
755   /* determine if any processor has disassembled, if so we must
756      also disassemble ourselfs, in order that we may reassemble. */
757   /*
758      if nonzero structure of submatrix B cannot change then we know that
759      no processor disassembled thus we can skip this stuff
760   */
761   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
762     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
763     if (mat->was_assembled && !other_disassembled) {
764       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
765     }
766   }
767 
768   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
769     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
770   }
771   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
772   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
773 
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775 
776   baij->rowvalues = 0;
777 
778   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
779   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
780     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
781     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer);
787 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
788 #include <petscdraw.h>
789 #undef __FUNCT__
790 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
791 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
792 {
793   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
794   PetscErrorCode    ierr;
795   PetscInt          bs   = mat->rmap->bs;
796   PetscMPIInt       rank = baij->rank;
797   PetscBool         iascii,isdraw;
798   PetscViewer       sviewer;
799   PetscViewerFormat format;
800 
801   PetscFunctionBegin;
802   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
803   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
804   if (iascii) {
805     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
806     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
807       MatInfo info;
808       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
809       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
810       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
812       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
816       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
817       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
818       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
819       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
820       PetscFunctionReturn(0);
821     } else if (format == PETSC_VIEWER_ASCII_INFO) {
822       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
823       PetscFunctionReturn(0);
824     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
825       PetscFunctionReturn(0);
826     }
827   }
828 
829   if (isdraw) {
830     PetscDraw draw;
831     PetscBool isnull;
832     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
833     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
834     if (isnull) PetscFunctionReturn(0);
835   }
836 
837   {
838     /* assemble the entire matrix onto first processor. */
839     Mat          A;
840     Mat_SeqSBAIJ *Aloc;
841     Mat_SeqBAIJ  *Bloc;
842     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
843     MatScalar    *a;
844     const char   *matname;
845 
846     /* Should this be the same type as mat? */
847     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
848     if (!rank) {
849       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
850     } else {
851       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
852     }
853     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
854     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
855     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
856     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
857 
858     /* copy over the A part */
859     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
860     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
861     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
862 
863     for (i=0; i<mbs; i++) {
864       rvals[0] = bs*(baij->rstartbs + i);
865       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
866       for (j=ai[i]; j<ai[i+1]; j++) {
867         col = (baij->cstartbs+aj[j])*bs;
868         for (k=0; k<bs; k++) {
869           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
870           col++;
871           a += bs;
872         }
873       }
874     }
875     /* copy over the B part */
876     Bloc = (Mat_SeqBAIJ*)baij->B->data;
877     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
878     for (i=0; i<mbs; i++) {
879 
880       rvals[0] = bs*(baij->rstartbs + i);
881       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
882       for (j=ai[i]; j<ai[i+1]; j++) {
883         col = baij->garray[aj[j]]*bs;
884         for (k=0; k<bs; k++) {
885           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
886           col++;
887           a += bs;
888         }
889       }
890     }
891     ierr = PetscFree(rvals);CHKERRQ(ierr);
892     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
893     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
894     /*
895        Everyone has to call to draw the matrix since the graphics waits are
896        synchronized across all processors that share the PetscDraw object
897     */
898     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
899     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
900     if (!rank) {
901       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
902       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
903     }
904     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
905     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
906     ierr = MatDestroy(&A);CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPISBAIJ"
913 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
914 {
915   PetscErrorCode ierr;
916   PetscBool      iascii,isdraw,issocket,isbinary;
917 
918   PetscFunctionBegin;
919   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
920   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
921   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
922   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
923   if (iascii || isdraw || issocket || isbinary) {
924     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatDestroy_MPISBAIJ"
931 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
932 {
933   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
934   PetscErrorCode ierr;
935 
936   PetscFunctionBegin;
937 #if defined(PETSC_USE_LOG)
938   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
939 #endif
940   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
941   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
942   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
943   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
944 #if defined(PETSC_USE_CTABLE)
945   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
946 #else
947   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
948 #endif
949   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
950   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
951   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
952   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
953   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
954   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
955   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
956   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
957   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
958   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
959   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
960   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
961   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
962   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
963   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
964 #if defined(PETSC_USE_REAL_MAT_SINGLE)
965   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
966 #endif
967   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
968   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
969   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
970   ierr = PetscFree(mat->data);CHKERRQ(ierr);
971 
972   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
973   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
976   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
978 #if defined(PETSC_HAVE_ELEMENTAL)
979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
980 #endif
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
986 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
989   PetscErrorCode    ierr;
990   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
991   PetscScalar       *from;
992   const PetscScalar *x;
993 
994   PetscFunctionBegin;
995   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
996   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
997 
998   /* diagonal part */
999   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1000   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1001 
1002   /* subdiagonal part */
1003   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1004 
1005   /* copy x into the vec slvec0 */
1006   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1007   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1008 
1009   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1010   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1011   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1012 
1013   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015   /* supperdiagonal part */
1016   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMult_MPISBAIJ"
1022 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1023 {
1024   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1025   PetscErrorCode    ierr;
1026   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1027   PetscScalar       *from;
1028   const PetscScalar *x;
1029 
1030   PetscFunctionBegin;
1031   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1032   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1033 
1034   /* diagonal part */
1035   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1036   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1037 
1038   /* subdiagonal part */
1039   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1040 
1041   /* copy x into the vec slvec0 */
1042   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1043   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1044 
1045   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1046   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1047   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1048 
1049   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051   /* supperdiagonal part */
1052   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1058 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1059 {
1060   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1061   PetscErrorCode ierr;
1062   PetscInt       nt;
1063 
1064   PetscFunctionBegin;
1065   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1066   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1067 
1068   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1069   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1070 
1071   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1072   /* do diagonal part */
1073   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1074   /* do supperdiagonal part */
1075   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1076   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1077   /* do subdiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1086 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1087 {
1088   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1089   PetscErrorCode    ierr;
1090   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1091   PetscScalar       *from,zero=0.0;
1092   const PetscScalar *x;
1093 
1094   PetscFunctionBegin;
1095   /*
1096   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1097   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1098   */
1099   /* diagonal part */
1100   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1101   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1102 
1103   /* subdiagonal part */
1104   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1105 
1106   /* copy x into the vec slvec0 */
1107   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1108   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1109   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1110   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1111 
1112   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1114   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115 
1116   /* supperdiagonal part */
1117   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1123 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1124 {
1125   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1130   /* do diagonal part */
1131   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1132   /* do supperdiagonal part */
1133   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1135 
1136   /* do subdiagonal part */
1137   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1138   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 /*
1144   This only works correctly for square matrices where the subblock A->A is the
1145    diagonal block
1146 */
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1149 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1150 {
1151   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1152   PetscErrorCode ierr;
1153 
1154   PetscFunctionBegin;
1155   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1156   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatScale_MPISBAIJ"
1162 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1163 {
1164   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1169   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1175 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1176 {
1177   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1178   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1179   PetscErrorCode ierr;
1180   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1181   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1182   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1183 
1184   PetscFunctionBegin;
1185   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1186   mat->getrowactive = PETSC_TRUE;
1187 
1188   if (!mat->rowvalues && (idx || v)) {
1189     /*
1190         allocate enough space to hold information from the longest row.
1191     */
1192     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1193     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1194     PetscInt     max = 1,mbs = mat->mbs,tmp;
1195     for (i=0; i<mbs; i++) {
1196       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1197       if (max < tmp) max = tmp;
1198     }
1199     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1200   }
1201 
1202   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1203   lrow = row - brstart;  /* local row index */
1204 
1205   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1206   if (!v)   {pvA = 0; pvB = 0;}
1207   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1208   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1209   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1210   nztot = nzA + nzB;
1211 
1212   cmap = mat->garray;
1213   if (v  || idx) {
1214     if (nztot) {
1215       /* Sort by increasing column numbers, assuming A and B already sorted */
1216       PetscInt imark = -1;
1217       if (v) {
1218         *v = v_p = mat->rowvalues;
1219         for (i=0; i<nzB; i++) {
1220           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1221           else break;
1222         }
1223         imark = i;
1224         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1225         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1226       }
1227       if (idx) {
1228         *idx = idx_p = mat->rowindices;
1229         if (imark > -1) {
1230           for (i=0; i<imark; i++) {
1231             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1232           }
1233         } else {
1234           for (i=0; i<nzB; i++) {
1235             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1236             else break;
1237           }
1238           imark = i;
1239         }
1240         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1241         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1242       }
1243     } else {
1244       if (idx) *idx = 0;
1245       if (v)   *v   = 0;
1246     }
1247   }
1248   *nz  = nztot;
1249   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1250   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1256 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1257 {
1258   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1259 
1260   PetscFunctionBegin;
1261   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1262   baij->getrowactive = PETSC_FALSE;
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1268 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1269 {
1270   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1271   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1272 
1273   PetscFunctionBegin;
1274   aA->getrow_utriangular = PETSC_TRUE;
1275   PetscFunctionReturn(0);
1276 }
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1279 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1280 {
1281   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1282   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1283 
1284   PetscFunctionBegin;
1285   aA->getrow_utriangular = PETSC_FALSE;
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1291 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1292 {
1293   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1294   PetscErrorCode ierr;
1295 
1296   PetscFunctionBegin;
1297   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1298   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1304 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1305 {
1306   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1307   PetscErrorCode ierr;
1308 
1309   PetscFunctionBegin;
1310   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1311   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1316    Input: isrow       - distributed(parallel),
1317           iscol_local - locally owned (seq)
1318 */
1319 #undef __FUNCT__
1320 #define __FUNCT__ "ISEqual_private"
1321 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1322 {
1323   PetscErrorCode ierr;
1324   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1325   const PetscInt *ptr1,*ptr2;
1326 
1327   PetscFunctionBegin;
1328   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1329   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1330   if (sz1 > sz2) {
1331     *flg = PETSC_FALSE;
1332     PetscFunctionReturn(0);
1333   }
1334 
1335   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1336   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1337 
1338   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1339   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1340   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1341   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1342   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1343   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1344 
1345   nmatch=0;
1346   k     = 0;
1347   for (i=0; i<sz1; i++){
1348     for (j=k; j<sz2; j++){
1349       if (a1[i] == a2[j]) {
1350         k = j; nmatch++;
1351         break;
1352       }
1353     }
1354   }
1355   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1356   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1357   ierr = PetscFree(a1);CHKERRQ(ierr);
1358   ierr = PetscFree(a2);CHKERRQ(ierr);
1359   if (nmatch < sz1) {
1360     *flg = PETSC_FALSE;
1361   } else {
1362     *flg = PETSC_TRUE;
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1369 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1370 {
1371   PetscErrorCode ierr;
1372   IS             iscol_local;
1373   PetscInt       csize;
1374   PetscBool      isequal;
1375 
1376   PetscFunctionBegin;
1377   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1378   if (call == MAT_REUSE_MATRIX) {
1379     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1380     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1381   } else {
1382     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1383     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1384     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1385   }
1386 
1387   /* now call MatGetSubMatrix_MPIBAIJ() */
1388   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1389   if (call == MAT_INITIAL_MATRIX) {
1390     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1391     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1392   }
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1398 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1399 {
1400   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1405   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1411 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1412 {
1413   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1414   Mat            A  = a->A,B = a->B;
1415   PetscErrorCode ierr;
1416   PetscReal      isend[5],irecv[5];
1417 
1418   PetscFunctionBegin;
1419   info->block_size = (PetscReal)matin->rmap->bs;
1420 
1421   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1422 
1423   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1424   isend[3] = info->memory;  isend[4] = info->mallocs;
1425 
1426   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1427 
1428   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1429   isend[3] += info->memory;  isend[4] += info->mallocs;
1430   if (flag == MAT_LOCAL) {
1431     info->nz_used      = isend[0];
1432     info->nz_allocated = isend[1];
1433     info->nz_unneeded  = isend[2];
1434     info->memory       = isend[3];
1435     info->mallocs      = isend[4];
1436   } else if (flag == MAT_GLOBAL_MAX) {
1437     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1438 
1439     info->nz_used      = irecv[0];
1440     info->nz_allocated = irecv[1];
1441     info->nz_unneeded  = irecv[2];
1442     info->memory       = irecv[3];
1443     info->mallocs      = irecv[4];
1444   } else if (flag == MAT_GLOBAL_SUM) {
1445     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1446 
1447     info->nz_used      = irecv[0];
1448     info->nz_allocated = irecv[1];
1449     info->nz_unneeded  = irecv[2];
1450     info->memory       = irecv[3];
1451     info->mallocs      = irecv[4];
1452   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1453   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1454   info->fill_ratio_needed = 0;
1455   info->factor_mallocs    = 0;
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1461 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1462 {
1463   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1464   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1465   PetscErrorCode ierr;
1466 
1467   PetscFunctionBegin;
1468   switch (op) {
1469   case MAT_NEW_NONZERO_LOCATIONS:
1470   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1471   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1472   case MAT_KEEP_NONZERO_PATTERN:
1473   case MAT_NEW_NONZERO_LOCATION_ERR:
1474     MatCheckPreallocated(A,1);
1475     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1476     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1477     break;
1478   case MAT_ROW_ORIENTED:
1479     MatCheckPreallocated(A,1);
1480     a->roworiented = flg;
1481 
1482     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1483     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1484     break;
1485   case MAT_NEW_DIAGONALS:
1486     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1487     break;
1488   case MAT_IGNORE_OFF_PROC_ENTRIES:
1489     a->donotstash = flg;
1490     break;
1491   case MAT_USE_HASH_TABLE:
1492     a->ht_flag = flg;
1493     break;
1494   case MAT_HERMITIAN:
1495     MatCheckPreallocated(A,1);
1496     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1497     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1498 
1499     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1500     break;
1501   case MAT_SPD:
1502     A->spd_set = PETSC_TRUE;
1503     A->spd     = flg;
1504     if (flg) {
1505       A->symmetric                  = PETSC_TRUE;
1506       A->structurally_symmetric     = PETSC_TRUE;
1507       A->symmetric_set              = PETSC_TRUE;
1508       A->structurally_symmetric_set = PETSC_TRUE;
1509     }
1510     break;
1511   case MAT_SYMMETRIC:
1512     MatCheckPreallocated(A,1);
1513     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1514     break;
1515   case MAT_STRUCTURALLY_SYMMETRIC:
1516     MatCheckPreallocated(A,1);
1517     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1518     break;
1519   case MAT_SYMMETRY_ETERNAL:
1520     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1521     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1522     break;
1523   case MAT_IGNORE_LOWER_TRIANGULAR:
1524     aA->ignore_ltriangular = flg;
1525     break;
1526   case MAT_ERROR_LOWER_TRIANGULAR:
1527     aA->ignore_ltriangular = flg;
1528     break;
1529   case MAT_GETROW_UPPERTRIANGULAR:
1530     aA->getrow_utriangular = flg;
1531     break;
1532   default:
1533     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1540 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1541 {
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   if (MAT_INITIAL_MATRIX || *B != A) {
1546     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1553 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1554 {
1555   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1556   Mat            a     = baij->A, b=baij->B;
1557   PetscErrorCode ierr;
1558   PetscInt       nv,m,n;
1559   PetscBool      flg;
1560 
1561   PetscFunctionBegin;
1562   if (ll != rr) {
1563     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1564     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1565   }
1566   if (!ll) PetscFunctionReturn(0);
1567 
1568   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1569   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1570 
1571   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1572   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1573 
1574   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1575 
1576   /* left diagonalscale the off-diagonal part */
1577   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1578 
1579   /* scale the diagonal part */
1580   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1581 
1582   /* right diagonalscale the off-diagonal part */
1583   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1590 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1591 {
1592   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatEqual_MPISBAIJ"
1604 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1605 {
1606   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1607   Mat            a,b,c,d;
1608   PetscBool      flg;
1609   PetscErrorCode ierr;
1610 
1611   PetscFunctionBegin;
1612   a = matA->A; b = matA->B;
1613   c = matB->A; d = matB->B;
1614 
1615   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1616   if (flg) {
1617     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1618   }
1619   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatCopy_MPISBAIJ"
1625 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1626 {
1627   PetscErrorCode ierr;
1628   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1629   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1630 
1631   PetscFunctionBegin;
1632   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1633   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1634     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1635     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1636     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1637   } else {
1638     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1639     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1640   }
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1646 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1647 {
1648   PetscErrorCode ierr;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1657 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1658 {
1659   PetscErrorCode ierr;
1660   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1661   PetscBLASInt   bnz,one=1;
1662   Mat_SeqSBAIJ   *xa,*ya;
1663   Mat_SeqBAIJ    *xb,*yb;
1664 
1665   PetscFunctionBegin;
1666   if (str == SAME_NONZERO_PATTERN) {
1667     PetscScalar alpha = a;
1668     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1669     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1670     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1671     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1672     xb   = (Mat_SeqBAIJ*)xx->B->data;
1673     yb   = (Mat_SeqBAIJ*)yy->B->data;
1674     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1675     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1676     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1677   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1678     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1679     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1680     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1681   } else {
1682     Mat      B;
1683     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1684     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1685     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1686     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1687     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1688     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1689     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1690     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1691     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1692     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1693     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1694     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1695     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1696     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1697     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1698     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1699     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1700     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1701     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1702     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1703   }
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1709 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1710 {
1711   PetscErrorCode ierr;
1712   PetscInt       i;
1713   PetscBool      flg;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1717   for (i=0; i<n; i++) {
1718     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1719     if (!flg) {
1720       Mat Bseq = *B[i];
1721       /* Bseq is non-symmetric SBAIJ matrix, only used internally by PETSc.
1722          Zero some ops' to avoid invalid usse */
1723       ierr = MatSetOption(Bseq,MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1724       Bseq->ops->mult                   = 0;
1725       Bseq->ops->multadd                = 0;
1726       Bseq->ops->multtranspose          = 0;
1727       Bseq->ops->multtransposeadd       = 0;
1728       Bseq->ops->lufactor               = 0;
1729       Bseq->ops->choleskyfactor         = 0;
1730       Bseq->ops->lufactorsymbolic       = 0;
1731       Bseq->ops->choleskyfactorsymbolic = 0;
1732       Bseq->ops->getinertia             = 0;
1733     }
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatShift_MPISBAIJ"
1740 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1741 {
1742   PetscErrorCode ierr;
1743   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1744   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1745 
1746   PetscFunctionBegin;
1747   if (!Y->preallocated) {
1748     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1749   } else if (!aij->nz) {
1750     PetscInt nonew = aij->nonew;
1751     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1752     aij->nonew = nonew;
1753   }
1754   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ"
1760 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1761 {
1762   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1763   PetscErrorCode ierr;
1764 
1765   PetscFunctionBegin;
1766   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1767   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1768   if (d) {
1769     PetscInt rstart;
1770     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1771     *d += rstart/A->rmap->bs;
1772 
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 
1778 /* -------------------------------------------------------------------*/
1779 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1780                                        MatGetRow_MPISBAIJ,
1781                                        MatRestoreRow_MPISBAIJ,
1782                                        MatMult_MPISBAIJ,
1783                                /*  4*/ MatMultAdd_MPISBAIJ,
1784                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1785                                        MatMultAdd_MPISBAIJ,
1786                                        0,
1787                                        0,
1788                                        0,
1789                                /* 10*/ 0,
1790                                        0,
1791                                        0,
1792                                        MatSOR_MPISBAIJ,
1793                                        MatTranspose_MPISBAIJ,
1794                                /* 15*/ MatGetInfo_MPISBAIJ,
1795                                        MatEqual_MPISBAIJ,
1796                                        MatGetDiagonal_MPISBAIJ,
1797                                        MatDiagonalScale_MPISBAIJ,
1798                                        MatNorm_MPISBAIJ,
1799                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1800                                        MatAssemblyEnd_MPISBAIJ,
1801                                        MatSetOption_MPISBAIJ,
1802                                        MatZeroEntries_MPISBAIJ,
1803                                /* 24*/ 0,
1804                                        0,
1805                                        0,
1806                                        0,
1807                                        0,
1808                                /* 29*/ MatSetUp_MPISBAIJ,
1809                                        0,
1810                                        0,
1811                                        0,
1812                                        0,
1813                                /* 34*/ MatDuplicate_MPISBAIJ,
1814                                        0,
1815                                        0,
1816                                        0,
1817                                        0,
1818                                /* 39*/ MatAXPY_MPISBAIJ,
1819                                        MatGetSubMatrices_MPISBAIJ,
1820                                        MatIncreaseOverlap_MPISBAIJ,
1821                                        MatGetValues_MPISBAIJ,
1822                                        MatCopy_MPISBAIJ,
1823                                /* 44*/ 0,
1824                                        MatScale_MPISBAIJ,
1825                                        MatShift_MPISBAIJ,
1826                                        0,
1827                                        0,
1828                                /* 49*/ 0,
1829                                        0,
1830                                        0,
1831                                        0,
1832                                        0,
1833                                /* 54*/ 0,
1834                                        0,
1835                                        MatSetUnfactored_MPISBAIJ,
1836                                        0,
1837                                        MatSetValuesBlocked_MPISBAIJ,
1838                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1839                                        0,
1840                                        0,
1841                                        0,
1842                                        0,
1843                                /* 64*/ 0,
1844                                        0,
1845                                        0,
1846                                        0,
1847                                        0,
1848                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1849                                        0,
1850                                        0,
1851                                        0,
1852                                        0,
1853                                /* 74*/ 0,
1854                                        0,
1855                                        0,
1856                                        0,
1857                                        0,
1858                                /* 79*/ 0,
1859                                        0,
1860                                        0,
1861                                        0,
1862                                        MatLoad_MPISBAIJ,
1863                                /* 84*/ 0,
1864                                        0,
1865                                        0,
1866                                        0,
1867                                        0,
1868                                /* 89*/ 0,
1869                                        0,
1870                                        0,
1871                                        0,
1872                                        0,
1873                                /* 94*/ 0,
1874                                        0,
1875                                        0,
1876                                        0,
1877                                        0,
1878                                /* 99*/ 0,
1879                                        0,
1880                                        0,
1881                                        0,
1882                                        0,
1883                                /*104*/ 0,
1884                                        MatRealPart_MPISBAIJ,
1885                                        MatImaginaryPart_MPISBAIJ,
1886                                        MatGetRowUpperTriangular_MPISBAIJ,
1887                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1888                                /*109*/ 0,
1889                                        0,
1890                                        0,
1891                                        0,
1892                                        MatMissingDiagonal_MPISBAIJ,
1893                                /*114*/ 0,
1894                                        0,
1895                                        0,
1896                                        0,
1897                                        0,
1898                                /*119*/ 0,
1899                                        0,
1900                                        0,
1901                                        0,
1902                                        0,
1903                                /*124*/ 0,
1904                                        0,
1905                                        0,
1906                                        0,
1907                                        0,
1908                                /*129*/ 0,
1909                                        0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                /*134*/ 0,
1914                                        0,
1915                                        0,
1916                                        0,
1917                                        0,
1918                                /*139*/ 0,
1919                                        0,
1920                                        0,
1921                                        0,
1922                                        0,
1923                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1924 };
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1928 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1929 {
1930   PetscFunctionBegin;
1931   *a = ((Mat_MPISBAIJ*)A->data)->A;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1937 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1938 {
1939   Mat_MPISBAIJ   *b;
1940   PetscErrorCode ierr;
1941   PetscInt       i,mbs,Mbs;
1942 
1943   PetscFunctionBegin;
1944   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1945   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1946   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1947   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1948 
1949   b   = (Mat_MPISBAIJ*)B->data;
1950   mbs = B->rmap->n/bs;
1951   Mbs = B->rmap->N/bs;
1952   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1953 
1954   B->rmap->bs = bs;
1955   b->bs2      = bs*bs;
1956   b->mbs      = mbs;
1957   b->Mbs      = Mbs;
1958   b->nbs      = B->cmap->n/bs;
1959   b->Nbs      = B->cmap->N/bs;
1960 
1961   for (i=0; i<=b->size; i++) {
1962     b->rangebs[i] = B->rmap->range[i]/bs;
1963   }
1964   b->rstartbs = B->rmap->rstart/bs;
1965   b->rendbs   = B->rmap->rend/bs;
1966 
1967   b->cstartbs = B->cmap->rstart/bs;
1968   b->cendbs   = B->cmap->rend/bs;
1969 
1970   if (!B->preallocated) {
1971     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1972     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1973     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1974     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1975     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1976     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1977     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1978     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1979     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1980   }
1981 
1982   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1983   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1984 
1985   B->preallocated = PETSC_TRUE;
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1991 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1992 {
1993   PetscInt       m,rstart,cstart,cend;
1994   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1995   const PetscInt *JJ    =0;
1996   PetscScalar    *values=0;
1997   PetscErrorCode ierr;
1998 
1999   PetscFunctionBegin;
2000   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2001   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2002   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2003   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2004   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2005   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2006   m      = B->rmap->n/bs;
2007   rstart = B->rmap->rstart/bs;
2008   cstart = B->cmap->rstart/bs;
2009   cend   = B->cmap->rend/bs;
2010 
2011   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2012   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2013   for (i=0; i<m; i++) {
2014     nz = ii[i+1] - ii[i];
2015     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2016     nz_max = PetscMax(nz_max,nz);
2017     JJ     = jj + ii[i];
2018     for (j=0; j<nz; j++) {
2019       if (*JJ >= cstart) break;
2020       JJ++;
2021     }
2022     d = 0;
2023     for (; j<nz; j++) {
2024       if (*JJ++ >= cend) break;
2025       d++;
2026     }
2027     d_nnz[i] = d;
2028     o_nnz[i] = nz - d;
2029   }
2030   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2031   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2032 
2033   values = (PetscScalar*)V;
2034   if (!values) {
2035     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2036     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2037   }
2038   for (i=0; i<m; i++) {
2039     PetscInt          row    = i + rstart;
2040     PetscInt          ncols  = ii[i+1] - ii[i];
2041     const PetscInt    *icols = jj + ii[i];
2042     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2043     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2044   }
2045 
2046   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2047   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2048   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2049   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 /*MC
2054    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2055    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2056    the matrix is stored.
2057 
2058   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2059   can call MatSetOption(Mat, MAT_HERMITIAN);
2060 
2061    Options Database Keys:
2062 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2063 
2064   Level: beginner
2065 
2066 .seealso: MatCreateMPISBAIJ
2067 M*/
2068 
2069 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatCreate_MPISBAIJ"
2073 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2074 {
2075   Mat_MPISBAIJ   *b;
2076   PetscErrorCode ierr;
2077   PetscBool      flg = PETSC_FALSE;
2078 
2079   PetscFunctionBegin;
2080   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2081   B->data = (void*)b;
2082   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2083 
2084   B->ops->destroy = MatDestroy_MPISBAIJ;
2085   B->ops->view    = MatView_MPISBAIJ;
2086   B->assembled    = PETSC_FALSE;
2087   B->insertmode   = NOT_SET_VALUES;
2088 
2089   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2090   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2091 
2092   /* build local table of row and column ownerships */
2093   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2094 
2095   /* build cache for off array entries formed */
2096   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2097 
2098   b->donotstash  = PETSC_FALSE;
2099   b->colmap      = NULL;
2100   b->garray      = NULL;
2101   b->roworiented = PETSC_TRUE;
2102 
2103   /* stuff used in block assembly */
2104   b->barray = 0;
2105 
2106   /* stuff used for matrix vector multiply */
2107   b->lvec    = 0;
2108   b->Mvctx   = 0;
2109   b->slvec0  = 0;
2110   b->slvec0b = 0;
2111   b->slvec1  = 0;
2112   b->slvec1a = 0;
2113   b->slvec1b = 0;
2114   b->sMvctx  = 0;
2115 
2116   /* stuff for MatGetRow() */
2117   b->rowindices   = 0;
2118   b->rowvalues    = 0;
2119   b->getrowactive = PETSC_FALSE;
2120 
2121   /* hash table stuff */
2122   b->ht           = 0;
2123   b->hd           = 0;
2124   b->ht_size      = 0;
2125   b->ht_flag      = PETSC_FALSE;
2126   b->ht_fact      = 0;
2127   b->ht_total_ct  = 0;
2128   b->ht_insert_ct = 0;
2129 
2130   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2131   b->ijonly = PETSC_FALSE;
2132 
2133   b->in_loc = 0;
2134   b->v_loc  = 0;
2135   b->n_loc  = 0;
2136 
2137   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2138   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2139   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2140   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2141   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2142   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
2143 #if defined(PETSC_HAVE_ELEMENTAL)
2144   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2145 #endif
2146 
2147   B->symmetric                  = PETSC_TRUE;
2148   B->structurally_symmetric     = PETSC_TRUE;
2149   B->symmetric_set              = PETSC_TRUE;
2150   B->structurally_symmetric_set = PETSC_TRUE;
2151 
2152   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2153   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2154   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2155   if (flg) {
2156     PetscReal fact = 1.39;
2157     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2158     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2159     if (fact <= 1.0) fact = 1.39;
2160     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2161     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2162   }
2163   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 /*MC
2168    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2169 
2170    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2171    and MATMPISBAIJ otherwise.
2172 
2173    Options Database Keys:
2174 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2175 
2176   Level: beginner
2177 
2178 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2179 M*/
2180 
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2183 /*@C
2184    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2185    the user should preallocate the matrix storage by setting the parameters
2186    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2187    performance can be increased by more than a factor of 50.
2188 
2189    Collective on Mat
2190 
2191    Input Parameters:
2192 +  B - the matrix
2193 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2194           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2195 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2196            submatrix  (same for all local rows)
2197 .  d_nnz - array containing the number of block nonzeros in the various block rows
2198            in the upper triangular and diagonal part of the in diagonal portion of the local
2199            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2200            for the diagonal entry and set a value even if it is zero.
2201 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2202            submatrix (same for all local rows).
2203 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2204            off-diagonal portion of the local submatrix that is right of the diagonal
2205            (possibly different for each block row) or NULL.
2206 
2207 
2208    Options Database Keys:
2209 .   -mat_no_unroll - uses code that does not unroll the loops in the
2210                      block calculations (much slower)
2211 .   -mat_block_size - size of the blocks to use
2212 
2213    Notes:
2214 
2215    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2216    than it must be used on all processors that share the object for that argument.
2217 
2218    If the *_nnz parameter is given then the *_nz parameter is ignored
2219 
2220    Storage Information:
2221    For a square global matrix we define each processor's diagonal portion
2222    to be its local rows and the corresponding columns (a square submatrix);
2223    each processor's off-diagonal portion encompasses the remainder of the
2224    local matrix (a rectangular submatrix).
2225 
2226    The user can specify preallocated storage for the diagonal part of
2227    the local submatrix with either d_nz or d_nnz (not both).  Set
2228    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2229    memory allocation.  Likewise, specify preallocated storage for the
2230    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2231 
2232    You can call MatGetInfo() to get information on how effective the preallocation was;
2233    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2234    You can also run with the option -info and look for messages with the string
2235    malloc in them to see if additional memory allocation was needed.
2236 
2237    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2238    the figure below we depict these three local rows and all columns (0-11).
2239 
2240 .vb
2241            0 1 2 3 4 5 6 7 8 9 10 11
2242           --------------------------
2243    row 3  |. . . d d d o o o o  o  o
2244    row 4  |. . . d d d o o o o  o  o
2245    row 5  |. . . d d d o o o o  o  o
2246           --------------------------
2247 .ve
2248 
2249    Thus, any entries in the d locations are stored in the d (diagonal)
2250    submatrix, and any entries in the o locations are stored in the
2251    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2252    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2253 
2254    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2255    plus the diagonal part of the d matrix,
2256    and o_nz should indicate the number of block nonzeros per row in the o matrix
2257 
2258    In general, for PDE problems in which most nonzeros are near the diagonal,
2259    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2260    or you will get TERRIBLE performance; see the users' manual chapter on
2261    matrices.
2262 
2263    Level: intermediate
2264 
2265 .keywords: matrix, block, aij, compressed row, sparse, parallel
2266 
2267 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2268 @*/
2269 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2270 {
2271   PetscErrorCode ierr;
2272 
2273   PetscFunctionBegin;
2274   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2275   PetscValidType(B,1);
2276   PetscValidLogicalCollectiveInt(B,bs,2);
2277   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatCreateSBAIJ"
2283 /*@C
2284    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2285    (block compressed row).  For good matrix assembly performance
2286    the user should preallocate the matrix storage by setting the parameters
2287    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2288    performance can be increased by more than a factor of 50.
2289 
2290    Collective on MPI_Comm
2291 
2292    Input Parameters:
2293 +  comm - MPI communicator
2294 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2295           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2296 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2297            This value should be the same as the local size used in creating the
2298            y vector for the matrix-vector product y = Ax.
2299 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2300            This value should be the same as the local size used in creating the
2301            x vector for the matrix-vector product y = Ax.
2302 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2303 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2304 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2305            submatrix  (same for all local rows)
2306 .  d_nnz - array containing the number of block nonzeros in the various block rows
2307            in the upper triangular portion of the in diagonal portion of the local
2308            (possibly different for each block block row) or NULL.
2309            If you plan to factor the matrix you must leave room for the diagonal entry and
2310            set its value even if it is zero.
2311 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2312            submatrix (same for all local rows).
2313 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2314            off-diagonal portion of the local submatrix (possibly different for
2315            each block row) or NULL.
2316 
2317    Output Parameter:
2318 .  A - the matrix
2319 
2320    Options Database Keys:
2321 .   -mat_no_unroll - uses code that does not unroll the loops in the
2322                      block calculations (much slower)
2323 .   -mat_block_size - size of the blocks to use
2324 .   -mat_mpi - use the parallel matrix data structures even on one processor
2325                (defaults to using SeqBAIJ format on one processor)
2326 
2327    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2328    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2329    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2330 
2331    Notes:
2332    The number of rows and columns must be divisible by blocksize.
2333    This matrix type does not support complex Hermitian operation.
2334 
2335    The user MUST specify either the local or global matrix dimensions
2336    (possibly both).
2337 
2338    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2339    than it must be used on all processors that share the object for that argument.
2340 
2341    If the *_nnz parameter is given then the *_nz parameter is ignored
2342 
2343    Storage Information:
2344    For a square global matrix we define each processor's diagonal portion
2345    to be its local rows and the corresponding columns (a square submatrix);
2346    each processor's off-diagonal portion encompasses the remainder of the
2347    local matrix (a rectangular submatrix).
2348 
2349    The user can specify preallocated storage for the diagonal part of
2350    the local submatrix with either d_nz or d_nnz (not both).  Set
2351    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2352    memory allocation.  Likewise, specify preallocated storage for the
2353    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2354 
2355    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2356    the figure below we depict these three local rows and all columns (0-11).
2357 
2358 .vb
2359            0 1 2 3 4 5 6 7 8 9 10 11
2360           --------------------------
2361    row 3  |. . . d d d o o o o  o  o
2362    row 4  |. . . d d d o o o o  o  o
2363    row 5  |. . . d d d o o o o  o  o
2364           --------------------------
2365 .ve
2366 
2367    Thus, any entries in the d locations are stored in the d (diagonal)
2368    submatrix, and any entries in the o locations are stored in the
2369    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2370    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2371 
2372    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2373    plus the diagonal part of the d matrix,
2374    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2375    In general, for PDE problems in which most nonzeros are near the diagonal,
2376    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2377    or you will get TERRIBLE performance; see the users' manual chapter on
2378    matrices.
2379 
2380    Level: intermediate
2381 
2382 .keywords: matrix, block, aij, compressed row, sparse, parallel
2383 
2384 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2385 @*/
2386 
2387 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2388 {
2389   PetscErrorCode ierr;
2390   PetscMPIInt    size;
2391 
2392   PetscFunctionBegin;
2393   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2394   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2395   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2396   if (size > 1) {
2397     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2398     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2399   } else {
2400     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2401     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2402   }
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2409 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2410 {
2411   Mat            mat;
2412   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2413   PetscErrorCode ierr;
2414   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2415   PetscScalar    *array;
2416 
2417   PetscFunctionBegin;
2418   *newmat = 0;
2419 
2420   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2421   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2422   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2423   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2424   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2425   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2426 
2427   mat->factortype   = matin->factortype;
2428   mat->preallocated = PETSC_TRUE;
2429   mat->assembled    = PETSC_TRUE;
2430   mat->insertmode   = NOT_SET_VALUES;
2431 
2432   a      = (Mat_MPISBAIJ*)mat->data;
2433   a->bs2 = oldmat->bs2;
2434   a->mbs = oldmat->mbs;
2435   a->nbs = oldmat->nbs;
2436   a->Mbs = oldmat->Mbs;
2437   a->Nbs = oldmat->Nbs;
2438 
2439 
2440   a->size         = oldmat->size;
2441   a->rank         = oldmat->rank;
2442   a->donotstash   = oldmat->donotstash;
2443   a->roworiented  = oldmat->roworiented;
2444   a->rowindices   = 0;
2445   a->rowvalues    = 0;
2446   a->getrowactive = PETSC_FALSE;
2447   a->barray       = 0;
2448   a->rstartbs     = oldmat->rstartbs;
2449   a->rendbs       = oldmat->rendbs;
2450   a->cstartbs     = oldmat->cstartbs;
2451   a->cendbs       = oldmat->cendbs;
2452 
2453   /* hash table stuff */
2454   a->ht           = 0;
2455   a->hd           = 0;
2456   a->ht_size      = 0;
2457   a->ht_flag      = oldmat->ht_flag;
2458   a->ht_fact      = oldmat->ht_fact;
2459   a->ht_total_ct  = 0;
2460   a->ht_insert_ct = 0;
2461 
2462   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2463   if (oldmat->colmap) {
2464 #if defined(PETSC_USE_CTABLE)
2465     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2466 #else
2467     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2468     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2469     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2470 #endif
2471   } else a->colmap = 0;
2472 
2473   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2474     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2475     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2476     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2477   } else a->garray = 0;
2478 
2479   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2480   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2481   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2482   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2483   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2484 
2485   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2486   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2487   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2488   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2489 
2490   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2491   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2492   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2493   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2494   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2495   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2496   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2497   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2498   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2499   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2500   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2501   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2502   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2503 
2504   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2505   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2506   a->sMvctx = oldmat->sMvctx;
2507   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2508 
2509   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2510   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2511   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2512   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2513   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2514   *newmat = mat;
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatLoad_MPISBAIJ"
2520 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2521 {
2522   PetscErrorCode ierr;
2523   PetscInt       i,nz,j,rstart,rend;
2524   PetscScalar    *vals,*buf;
2525   MPI_Comm       comm;
2526   MPI_Status     status;
2527   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2528   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2529   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2530   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2531   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2532   PetscInt       dcount,kmax,k,nzcount,tmp;
2533   int            fd;
2534 
2535   PetscFunctionBegin;
2536   /* force binary viewer to load .info file if it has not yet done so */
2537   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2538   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2539   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2540   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2541   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2542   if (bs < 0) bs = 1;
2543 
2544   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2545   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2546   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2547   if (!rank) {
2548     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2549     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2550     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2551   }
2552 
2553   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2554   M    = header[1];
2555   N    = header[2];
2556 
2557   /* If global sizes are set, check if they are consistent with that given in the file */
2558   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2559   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2560 
2561   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2562 
2563   /*
2564      This code adds extra rows to make sure the number of rows is
2565      divisible by the blocksize
2566   */
2567   Mbs        = M/bs;
2568   extra_rows = bs - M + bs*(Mbs);
2569   if (extra_rows == bs) extra_rows = 0;
2570   else                  Mbs++;
2571   if (extra_rows &&!rank) {
2572     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2573   }
2574 
2575   /* determine ownership of all rows */
2576   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2577     mbs = Mbs/size + ((Mbs % size) > rank);
2578     m   = mbs*bs;
2579   } else { /* User Set */
2580     m   = newmat->rmap->n;
2581     mbs = m/bs;
2582   }
2583   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2584   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2585   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2586   rowners[0] = 0;
2587   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2588   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2589   rstart = rowners[rank];
2590   rend   = rowners[rank+1];
2591 
2592   /* distribute row lengths to all processors */
2593   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2594   if (!rank) {
2595     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2596     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2597     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2598     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2599     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2600     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2601     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2602   } else {
2603     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2604   }
2605 
2606   if (!rank) {   /* procs[0] */
2607     /* calculate the number of nonzeros on each processor */
2608     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2609     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2610     for (i=0; i<size; i++) {
2611       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2612         procsnz[i] += rowlengths[j];
2613       }
2614     }
2615     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2616 
2617     /* determine max buffer needed and allocate it */
2618     maxnz = 0;
2619     for (i=0; i<size; i++) {
2620       maxnz = PetscMax(maxnz,procsnz[i]);
2621     }
2622     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2623 
2624     /* read in my part of the matrix column indices  */
2625     nz     = procsnz[0];
2626     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2627     mycols = ibuf;
2628     if (size == 1) nz -= extra_rows;
2629     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2630     if (size == 1) {
2631       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2632     }
2633 
2634     /* read in every ones (except the last) and ship off */
2635     for (i=1; i<size-1; i++) {
2636       nz   = procsnz[i];
2637       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2638       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2639     }
2640     /* read in the stuff for the last proc */
2641     if (size != 1) {
2642       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2643       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2644       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2645       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2646     }
2647     ierr = PetscFree(cols);CHKERRQ(ierr);
2648   } else {  /* procs[i], i>0 */
2649     /* determine buffer space needed for message */
2650     nz = 0;
2651     for (i=0; i<m; i++) nz += locrowlens[i];
2652     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2653     mycols = ibuf;
2654     /* receive message of column indices*/
2655     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2656     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2657     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2658   }
2659 
2660   /* loop over local rows, determining number of off diagonal entries */
2661   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2662   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2663   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2664   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2665   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2666   rowcount = 0;
2667   nzcount  = 0;
2668   for (i=0; i<mbs; i++) {
2669     dcount  = 0;
2670     odcount = 0;
2671     for (j=0; j<bs; j++) {
2672       kmax = locrowlens[rowcount];
2673       for (k=0; k<kmax; k++) {
2674         tmp = mycols[nzcount++]/bs; /* block col. index */
2675         if (!mask[tmp]) {
2676           mask[tmp] = 1;
2677           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2678           else masked1[dcount++] = tmp; /* entry in diag portion */
2679         }
2680       }
2681       rowcount++;
2682     }
2683 
2684     dlens[i]  = dcount;  /* d_nzz[i] */
2685     odlens[i] = odcount; /* o_nzz[i] */
2686 
2687     /* zero out the mask elements we set */
2688     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2689     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2690   }
2691   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2692   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2693   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2694 
2695   if (!rank) {
2696     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2697     /* read in my part of the matrix numerical values  */
2698     nz     = procsnz[0];
2699     vals   = buf;
2700     mycols = ibuf;
2701     if (size == 1) nz -= extra_rows;
2702     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2703     if (size == 1) {
2704       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2705     }
2706 
2707     /* insert into matrix */
2708     jj = rstart*bs;
2709     for (i=0; i<m; i++) {
2710       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2711       mycols += locrowlens[i];
2712       vals   += locrowlens[i];
2713       jj++;
2714     }
2715 
2716     /* read in other processors (except the last one) and ship out */
2717     for (i=1; i<size-1; i++) {
2718       nz   = procsnz[i];
2719       vals = buf;
2720       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2721       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2722     }
2723     /* the last proc */
2724     if (size != 1) {
2725       nz   = procsnz[i] - extra_rows;
2726       vals = buf;
2727       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2728       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2729       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2730     }
2731     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2732 
2733   } else {
2734     /* receive numeric values */
2735     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2736 
2737     /* receive message of values*/
2738     vals   = buf;
2739     mycols = ibuf;
2740     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2741     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2742     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2743 
2744     /* insert into matrix */
2745     jj = rstart*bs;
2746     for (i=0; i<m; i++) {
2747       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2748       mycols += locrowlens[i];
2749       vals   += locrowlens[i];
2750       jj++;
2751     }
2752   }
2753 
2754   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2755   ierr = PetscFree(buf);CHKERRQ(ierr);
2756   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2757   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2758   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2759   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2760   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2761   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 #undef __FUNCT__
2766 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2767 /*XXXXX@
2768    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2769 
2770    Input Parameters:
2771 .  mat  - the matrix
2772 .  fact - factor
2773 
2774    Not Collective on Mat, each process can have a different hash factor
2775 
2776    Level: advanced
2777 
2778   Notes:
2779    This can also be set by the command line option: -mat_use_hash_table fact
2780 
2781 .keywords: matrix, hashtable, factor, HT
2782 
2783 .seealso: MatSetOption()
2784 @XXXXX*/
2785 
2786 
2787 #undef __FUNCT__
2788 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2789 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2790 {
2791   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2792   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2793   PetscReal      atmp;
2794   PetscReal      *work,*svalues,*rvalues;
2795   PetscErrorCode ierr;
2796   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2797   PetscMPIInt    rank,size;
2798   PetscInt       *rowners_bs,dest,count,source;
2799   PetscScalar    *va;
2800   MatScalar      *ba;
2801   MPI_Status     stat;
2802 
2803   PetscFunctionBegin;
2804   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2805   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2806   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2807 
2808   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2809   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2810 
2811   bs  = A->rmap->bs;
2812   mbs = a->mbs;
2813   Mbs = a->Mbs;
2814   ba  = b->a;
2815   bi  = b->i;
2816   bj  = b->j;
2817 
2818   /* find ownerships */
2819   rowners_bs = A->rmap->range;
2820 
2821   /* each proc creates an array to be distributed */
2822   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2823   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2824 
2825   /* row_max for B */
2826   if (rank != size-1) {
2827     for (i=0; i<mbs; i++) {
2828       ncols = bi[1] - bi[0]; bi++;
2829       brow  = bs*i;
2830       for (j=0; j<ncols; j++) {
2831         bcol = bs*(*bj);
2832         for (kcol=0; kcol<bs; kcol++) {
2833           col  = bcol + kcol;                /* local col index */
2834           col += rowners_bs[rank+1];      /* global col index */
2835           for (krow=0; krow<bs; krow++) {
2836             atmp = PetscAbsScalar(*ba); ba++;
2837             row  = brow + krow;   /* local row index */
2838             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2839             if (work[col] < atmp) work[col] = atmp;
2840           }
2841         }
2842         bj++;
2843       }
2844     }
2845 
2846     /* send values to its owners */
2847     for (dest=rank+1; dest<size; dest++) {
2848       svalues = work + rowners_bs[dest];
2849       count   = rowners_bs[dest+1]-rowners_bs[dest];
2850       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2851     }
2852   }
2853 
2854   /* receive values */
2855   if (rank) {
2856     rvalues = work;
2857     count   = rowners_bs[rank+1]-rowners_bs[rank];
2858     for (source=0; source<rank; source++) {
2859       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2860       /* process values */
2861       for (i=0; i<count; i++) {
2862         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2863       }
2864     }
2865   }
2866 
2867   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2868   ierr = PetscFree(work);CHKERRQ(ierr);
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatSOR_MPISBAIJ"
2874 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2875 {
2876   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2877   PetscErrorCode    ierr;
2878   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2879   PetscScalar       *x,*ptr,*from;
2880   Vec               bb1;
2881   const PetscScalar *b;
2882 
2883   PetscFunctionBegin;
2884   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2885   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2886 
2887   if (flag == SOR_APPLY_UPPER) {
2888     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2889     PetscFunctionReturn(0);
2890   }
2891 
2892   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2893     if (flag & SOR_ZERO_INITIAL_GUESS) {
2894       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2895       its--;
2896     }
2897 
2898     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2899     while (its--) {
2900 
2901       /* lower triangular part: slvec0b = - B^T*xx */
2902       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2903 
2904       /* copy xx into slvec0a */
2905       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2906       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2907       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2908       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2909 
2910       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2911 
2912       /* copy bb into slvec1a */
2913       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2914       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2915       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2916       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2917 
2918       /* set slvec1b = 0 */
2919       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2920 
2921       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2922       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2923       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2924       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2925 
2926       /* upper triangular part: bb1 = bb1 - B*x */
2927       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2928 
2929       /* local diagonal sweep */
2930       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2931     }
2932     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2933   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2934     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2935   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2936     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2937   } else if (flag & SOR_EISENSTAT) {
2938     Vec               xx1;
2939     PetscBool         hasop;
2940     const PetscScalar *diag;
2941     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2942     PetscInt          i,n;
2943 
2944     if (!mat->xx1) {
2945       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2946       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2947     }
2948     xx1 = mat->xx1;
2949     bb1 = mat->bb1;
2950 
2951     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2952 
2953     if (!mat->diag) {
2954       /* this is wrong for same matrix with new nonzero values */
2955       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2956       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2957     }
2958     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2959 
2960     if (hasop) {
2961       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2962       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2963     } else {
2964       /*
2965           These two lines are replaced by code that may be a bit faster for a good compiler
2966       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2967       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2968       */
2969       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2970       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2971       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2972       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2973       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2974       if (omega == 1.0) {
2975         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2976         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2977       } else {
2978         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2979         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2980       }
2981       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2982       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2983       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2984       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2985     }
2986 
2987     /* multiply off-diagonal portion of matrix */
2988     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2989     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2990     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2991     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2992     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2993     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2994     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2995     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2996     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2997     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2998 
2999     /* local sweep */
3000     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3001     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3002   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 #undef __FUNCT__
3007 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
3008 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3009 {
3010   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3011   PetscErrorCode ierr;
3012   Vec            lvec1,bb1;
3013 
3014   PetscFunctionBegin;
3015   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3016   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3017 
3018   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3019     if (flag & SOR_ZERO_INITIAL_GUESS) {
3020       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3021       its--;
3022     }
3023 
3024     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3025     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3026     while (its--) {
3027       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3028 
3029       /* lower diagonal part: bb1 = bb - B^T*xx */
3030       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3031       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3032 
3033       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3034       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3035       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3036 
3037       /* upper diagonal part: bb1 = bb1 - B*x */
3038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3040 
3041       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3042 
3043       /* diagonal sweep */
3044       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3045     }
3046     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3047     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3048   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3049   PetscFunctionReturn(0);
3050 }
3051 
3052 #undef __FUNCT__
3053 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3054 /*@
3055      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3056          CSR format the local rows.
3057 
3058    Collective on MPI_Comm
3059 
3060    Input Parameters:
3061 +  comm - MPI communicator
3062 .  bs - the block size, only a block size of 1 is supported
3063 .  m - number of local rows (Cannot be PETSC_DECIDE)
3064 .  n - This value should be the same as the local size used in creating the
3065        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3066        calculated if N is given) For square matrices n is almost always m.
3067 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3068 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3069 .   i - row indices
3070 .   j - column indices
3071 -   a - matrix values
3072 
3073    Output Parameter:
3074 .   mat - the matrix
3075 
3076    Level: intermediate
3077 
3078    Notes:
3079        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3080      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3081      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3082 
3083        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3084 
3085 .keywords: matrix, aij, compressed row, sparse, parallel
3086 
3087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3088           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3089 @*/
3090 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3091 {
3092   PetscErrorCode ierr;
3093 
3094 
3095   PetscFunctionBegin;
3096   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3097   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3098   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3099   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3100   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3101   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 
3106 #undef __FUNCT__
3107 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3108 /*@C
3109    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3110    (the default parallel PETSc format).
3111 
3112    Collective on MPI_Comm
3113 
3114    Input Parameters:
3115 +  B - the matrix
3116 .  bs - the block size
3117 .  i - the indices into j for the start of each local row (starts with zero)
3118 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3119 -  v - optional values in the matrix
3120 
3121    Level: developer
3122 
3123 .keywords: matrix, aij, compressed row, sparse, parallel
3124 
3125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3126 @*/
3127 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3128 {
3129   PetscErrorCode ierr;
3130 
3131   PetscFunctionBegin;
3132   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3133   PetscFunctionReturn(0);
3134 }
3135 
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3138 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3139 {
3140   PetscErrorCode ierr;
3141   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3142   PetscInt       *indx;
3143   PetscScalar    *values;
3144 
3145   PetscFunctionBegin;
3146   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3147   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3148     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3149     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3150     PetscInt       *bindx,rmax=a->rmax,j;
3151 
3152     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3153     mbs = m/bs; Nbs = N/cbs;
3154     if (n == PETSC_DECIDE) {
3155       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3156     }
3157     /* Check sum(n) = Nbs */
3158     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3159     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3160 
3161     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3162     rstart -= mbs;
3163 
3164     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3165     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3166     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3167     for (i=0; i<mbs; i++) {
3168       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3169       nnz = nnz/bs;
3170       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3171       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3172       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3173     }
3174     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3175     ierr = PetscFree(bindx);CHKERRQ(ierr);
3176 
3177     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3178     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3179     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3180     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3181     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3182     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3183   }
3184 
3185   /* numeric phase */
3186   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3187   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3188 
3189   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3190   for (i=0; i<m; i++) {
3191     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3192     Ii   = i + rstart;
3193     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3194     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3195   }
3196   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3197   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3198   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3199   PetscFunctionReturn(0);
3200 }
3201