xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4035e84d3faf4a20d1b4a222249bd7c04df3e9ca)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55   { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60     bcol = col/bs; \
61     ridx = row % bs; cidx = col % bs; \
62     low  = 0; high = nrow; \
63     while (high-low > 3) { \
64       t = (low+high)/2; \
65       if (rp[t] > bcol) high = t; \
66       else              low  = t; \
67     } \
68     for (_i=low; _i<high; _i++) { \
69       if (rp[_i] > bcol) break; \
70       if (rp[_i] == bcol) { \
71         bap = ap + bs2*_i + bs*cidx + ridx; \
72         if (addv == ADD_VALUES) *bap += value;  \
73         else                    *bap  = value;  \
74         goto a_noinsert; \
75       } \
76     } \
77     if (a->nonew == 1) goto a_noinsert; \
78     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80     N = nrow++ - 1;  \
81     /* shift up all the later entries in this row */ \
82     for (ii=N; ii>=_i; ii--) { \
83       rp[ii+1] = rp[ii]; \
84       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85     } \
86     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87     rp[_i]                      = bcol;  \
88     ap[bs2*_i + bs*cidx + ridx] = value;  \
89 a_noinsert:; \
90     ailen[brow] = nrow; \
91   }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94   { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98     bcol = col/bs; \
99     ridx = row % bs; cidx = col % bs; \
100     low  = 0; high = nrow; \
101     while (high-low > 3) { \
102       t = (low+high)/2; \
103       if (rp[t] > bcol) high = t; \
104       else              low  = t; \
105     } \
106     for (_i=low; _i<high; _i++) { \
107       if (rp[_i] > bcol) break; \
108       if (rp[_i] == bcol) { \
109         bap = ap + bs2*_i + bs*cidx + ridx; \
110         if (addv == ADD_VALUES) *bap += value;  \
111         else                    *bap  = value;  \
112         goto b_noinsert; \
113       } \
114     } \
115     if (b->nonew == 1) goto b_noinsert; \
116     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118     N = nrow++ - 1;  \
119     /* shift up all the later entries in this row */ \
120     for (ii=N; ii>=_i; ii--) { \
121       rp[ii+1] = rp[ii]; \
122       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123     } \
124     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125     rp[_i]                      = bcol;  \
126     ap[bs2*_i + bs*cidx + ridx] = value;  \
127 b_noinsert:; \
128     bilen[brow] = nrow; \
129   }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat          A     = baij->A;
149   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar    *aa   =a->a;
152 
153   Mat         B     = baij->B;
154   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar   *ba   =b->a;
157 
158   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar *ap,*bap;
161 
162   /* for stash */
163   PetscInt  n_loc, *in_loc = NULL;
164   MatScalar *v_loc = NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash) {
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174 
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs) {
190           if (a->ignore_ltriangular) {
191             continue;    /* ignore lower triangular blocks */
192           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193         }
194         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
195           col  = in[j] - cstart_orig;         /* local col index */
196           brow = row/bs; bcol = col/bs;
197           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
198           if (roworiented) value = v[i*n+j];
199           else             value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined(PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col  =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B    = baij->B;
222               b    = (Mat_SeqBAIJ*)(B)->data;
223               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba   = b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j];
228           else             value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
235       if (!baij->donotstash) {
236         mat->assembled = PETSC_FALSE;
237         n_loc          = 0;
238         for (j=0; j<n; j++) {
239           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
240           in_loc[n_loc] = in[j];
241           if (roworiented) {
242             v_loc[n_loc] = v[i*n+j];
243           } else {
244             v_loc[n_loc] = v[j*m+i];
245           }
246           n_loc++;
247         }
248         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
258 {
259   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
260   const MatScalar *value;
261   MatScalar       *barray     =baij->barray;
262   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
263   PetscErrorCode  ierr;
264   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
265   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
266   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
267 
268   PetscFunctionBegin;
269   if (!barray) {
270     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
271     baij->barray = barray;
272   }
273 
274   if (roworiented) {
275     stepval = (n-1)*bs;
276   } else {
277     stepval = (m-1)*bs;
278   }
279   for (i=0; i<m; i++) {
280     if (im[i] < 0) continue;
281 #if defined(PETSC_USE_DEBUG)
282     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
283 #endif
284     if (im[i] >= rstart && im[i] < rend) {
285       row = im[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (im[i] > in[j]) {
288           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
289           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
290         }
291         /* If NumCol = 1 then a copy is not required */
292         if ((roworiented) && (n == 1)) {
293           barray = (MatScalar*) v + i*bs2;
294         } else if ((!roworiented) && (m == 1)) {
295           barray = (MatScalar*) v + j*bs2;
296         } else { /* Here a copy is required */
297           if (roworiented) {
298             value = v + i*(stepval+bs)*bs + j*bs;
299           } else {
300             value = v + j*(stepval+bs)*bs + i*bs;
301           }
302           for (ii=0; ii<bs; ii++,value+=stepval) {
303             for (jj=0; jj<bs; jj++) {
304               *barray++ = *value++;
305             }
306           }
307           barray -=bs2;
308         }
309 
310         if (in[j] >= cstart && in[j] < cend) {
311           col  = in[j] - cstart;
312           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
313         } else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined(PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined(PETSC_USE_CTABLE)
334             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col  = in[j];
342             }
343           } else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
349       if (!baij->donotstash) {
350         if (roworiented) {
351           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
352         } else {
353           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
354         }
355       }
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatGetValues_MPISBAIJ"
363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
364 {
365   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
366   PetscErrorCode ierr;
367   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
368   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
373     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
374     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
375       row = idxm[i] - bsrstart;
376       for (j=0; j<n; j++) {
377         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
378         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
379         if (idxn[j] >= bscstart && idxn[j] < bscend) {
380           col  = idxn[j] - bscstart;
381           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
382         } else {
383           if (!baij->colmap) {
384             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
385           }
386 #if defined(PETSC_USE_CTABLE)
387           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
388           data--;
389 #else
390           data = baij->colmap[idxn[j]/bs]-1;
391 #endif
392           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
393           else {
394             col  = data + idxn[j]%bs;
395             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
396           }
397         }
398       }
399     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatNorm_MPISBAIJ"
406 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
407 {
408   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscReal      sum[2],*lnorm2;
411 
412   PetscFunctionBegin;
413   if (baij->size == 1) {
414     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
415   } else {
416     if (type == NORM_FROBENIUS) {
417       ierr    = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
418       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
420       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
421       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
422       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
423       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
424       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
425     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
426       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
427       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
428       PetscReal    *rsum,*rsum2,vabs;
429       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
430       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
431       MatScalar    *v;
432 
433       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
434       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
435       /* Amat */
436       v = amat->a; jj = amat->j;
437       for (brow=0; brow<mbs; brow++) {
438         grow = bs*(rstart + brow);
439         nz   = amat->i[brow+1] - amat->i[brow];
440         for (bcol=0; bcol<nz; bcol++) {
441           gcol = bs*(rstart + *jj); jj++;
442           for (col=0; col<bs; col++) {
443             for (row=0; row<bs; row++) {
444               vabs            = PetscAbsScalar(*v); v++;
445               rsum[gcol+col] += vabs;
446               /* non-diagonal block */
447               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
448             }
449           }
450         }
451       }
452       /* Bmat */
453       v = bmat->a; jj = bmat->j;
454       for (brow=0; brow<mbs; brow++) {
455         grow = bs*(rstart + brow);
456         nz = bmat->i[brow+1] - bmat->i[brow];
457         for (bcol=0; bcol<nz; bcol++) {
458           gcol = bs*garray[*jj]; jj++;
459           for (col=0; col<bs; col++) {
460             for (row=0; row<bs; row++) {
461               vabs            = PetscAbsScalar(*v); v++;
462               rsum[gcol+col] += vabs;
463               rsum[grow+row] += vabs;
464             }
465           }
466         }
467       }
468       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
469       *norm = 0.0;
470       for (col=0; col<mat->cmap->N; col++) {
471         if (rsum2[col] > *norm) *norm = rsum2[col];
472       }
473       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
474     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
475   }
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
481 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
484   PetscErrorCode ierr;
485   PetscInt       nstash,reallocs;
486   InsertMode     addv;
487 
488   PetscFunctionBegin;
489   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
490 
491   /* make sure all processors are either in INSERTMODE or ADDMODE */
492   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
493   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
494   mat->insertmode = addv; /* in case this processor had no cache */
495 
496   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
497   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
498   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
499   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
500   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
501   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
507 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
508 {
509   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
510   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
511   PetscErrorCode ierr;
512   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
513   PetscInt       *row,*col;
514   PetscBool      other_disassembled;
515   PetscMPIInt    n;
516   PetscBool      r1,r2,r3;
517   MatScalar      *val;
518   InsertMode     addv = mat->insertmode;
519 
520   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
521   PetscFunctionBegin;
522   if (!baij->donotstash &&  !mat->nooffprocentries) {
523     while (1) {
524       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
525       if (!flg) break;
526 
527       for (i=0; i<n;) {
528         /* Now identify the consecutive vals belonging to the same row */
529         for (j=i,rstart=row[j]; j<n; j++) {
530           if (row[j] != rstart) break;
531         }
532         if (j < n) ncols = j-i;
533         else       ncols = n-i;
534         /* Now assemble all these values with a single function call */
535         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
536         i    = j;
537       }
538     }
539     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
540     /* Now process the block-stash. Since the values are stashed column-oriented,
541        set the roworiented flag to column oriented, and after MatSetValues()
542        restore the original flags */
543     r1 = baij->roworiented;
544     r2 = a->roworiented;
545     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
546 
547     baij->roworiented = PETSC_FALSE;
548     a->roworiented    = PETSC_FALSE;
549 
550     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
551     while (1) {
552       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
553       if (!flg) break;
554 
555       for (i=0; i<n;) {
556         /* Now identify the consecutive vals belonging to the same row */
557         for (j=i,rstart=row[j]; j<n; j++) {
558           if (row[j] != rstart) break;
559         }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i    = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567 
568     baij->roworiented = r1;
569     a->roworiented    = r2;
570 
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598 
599   baij->rowvalues = 0;
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #include <petscdraw.h>
605 #undef __FUNCT__
606 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
607 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
608 {
609   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
610   PetscErrorCode    ierr;
611   PetscInt          bs   = mat->rmap->bs;
612   PetscMPIInt       size = baij->size,rank = baij->rank;
613   PetscBool         iascii,isdraw;
614   PetscViewer       sviewer;
615   PetscViewerFormat format;
616 
617   PetscFunctionBegin;
618   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
619   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
620   if (iascii) {
621     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
622     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
623       MatInfo info;
624       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
625       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
634       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
635       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_INFO) {
638       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
639       PetscFunctionReturn(0);
640     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
641       PetscFunctionReturn(0);
642     }
643   }
644 
645   if (isdraw) {
646     PetscDraw draw;
647     PetscBool isnull;
648     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
649     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
650   }
651 
652   if (size == 1) {
653     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
654     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
655   } else {
656     /* assemble the entire matrix onto first processor. */
657     Mat          A;
658     Mat_SeqSBAIJ *Aloc;
659     Mat_SeqBAIJ  *Bloc;
660     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
661     MatScalar    *a;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
672     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
673     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
674 
675     /* copy over the A part */
676     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
677     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
678     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
679 
680     for (i=0; i<mbs; i++) {
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = (baij->cstartbs+aj[j])*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++;
688           a += bs;
689         }
690       }
691     }
692     /* copy over the B part */
693     Bloc = (Mat_SeqBAIJ*)baij->B->data;
694     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
695     for (i=0; i<mbs; i++) {
696 
697       rvals[0] = bs*(baij->rstartbs + i);
698       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
699       for (j=ai[i]; j<ai[i+1]; j++) {
700         col = baij->garray[aj[j]]*bs;
701         for (k=0; k<bs; k++) {
702           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
703           col++;
704           a += bs;
705         }
706       }
707     }
708     ierr = PetscFree(rvals);CHKERRQ(ierr);
709     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
711     /*
712        Everyone has to call to draw the matrix since the graphics waits are
713        synchronized across all processors that share the PetscDraw object
714     */
715     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     if (!rank) {
717       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
718       /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
719       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
720       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
721     }
722     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
723     ierr = MatDestroy(&A);CHKERRQ(ierr);
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatView_MPISBAIJ"
730 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
731 {
732   PetscErrorCode ierr;
733   PetscBool      iascii,isdraw,issocket,isbinary;
734 
735   PetscFunctionBegin;
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
740   if (iascii || isdraw || issocket || isbinary) {
741     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatDestroy_MPISBAIJ"
748 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
749 {
750   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754 #if defined(PETSC_USE_LOG)
755   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
756 #endif
757   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
758   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
760   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
761 #if defined(PETSC_USE_CTABLE)
762   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
763 #else
764   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
765 #endif
766   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
768   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
774   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
775   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
776   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
777   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
781 #if defined(PETSC_USE_REAL_MAT_SINGLE)
782   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
783 #endif
784   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
787   ierr = PetscFree(mat->data);CHKERRQ(ierr);
788 
789   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",NULL);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
801 {
802   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
805   PetscScalar    *x,*from;
806 
807   PetscFunctionBegin;
808   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
809   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
810 
811   /* diagonal part */
812   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
813   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
814 
815   /* subdiagonal part */
816   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
817 
818   /* copy x into the vec slvec0 */
819   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
823   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825 
826   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828   /* supperdiagonal part */
829   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMult_MPISBAIJ"
835 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
836 {
837   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
838   PetscErrorCode ierr;
839   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
840   PetscScalar    *x,*from;
841 
842   PetscFunctionBegin;
843   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
844   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
879 
880   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
881   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
882 
883   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   /* do diagonal part */
885   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
886   /* do supperdiagonal part */
887   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
889   /* do subdiagonal part */
890   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
891   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A));
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   /* do diagonal part */
942   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
943   /* do supperdiagonal part */
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
946 
947   /* do subdiagonal part */
948   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
949   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) max = tmp;
1009     }
1010     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1047             else break;
1048           }
1049           imark = i;
1050         }
1051         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1052         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1053       }
1054     } else {
1055       if (idx) *idx = 0;
1056       if (v)   *v   = 0;
1057     }
1058   }
1059   *nz  = nztot;
1060   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1061   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1067 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1068 {
1069   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1070 
1071   PetscFunctionBegin;
1072   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1073   baij->getrowactive = PETSC_FALSE;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1079 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1080 {
1081   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1082   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1083 
1084   PetscFunctionBegin;
1085   aA->getrow_utriangular = PETSC_TRUE;
1086   PetscFunctionReturn(0);
1087 }
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1090 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1091 {
1092   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1093   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1094 
1095   PetscFunctionBegin;
1096   aA->getrow_utriangular = PETSC_FALSE;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1102 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1103 {
1104   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1109   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1115 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1116 {
1117   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1122   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1128 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1129 {
1130   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1135   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1141 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1142 {
1143   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1144   Mat            A  = a->A,B = a->B;
1145   PetscErrorCode ierr;
1146   PetscReal      isend[5],irecv[5];
1147 
1148   PetscFunctionBegin;
1149   info->block_size = (PetscReal)matin->rmap->bs;
1150 
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152 
1153   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1154   isend[3] = info->memory;  isend[4] = info->mallocs;
1155 
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157 
1158   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159   isend[3] += info->memory;  isend[4] += info->mallocs;
1160   if (flag == MAT_LOCAL) {
1161     info->nz_used      = isend[0];
1162     info->nz_allocated = isend[1];
1163     info->nz_unneeded  = isend[2];
1164     info->memory       = isend[3];
1165     info->mallocs      = isend[4];
1166   } else if (flag == MAT_GLOBAL_MAX) {
1167     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1168 
1169     info->nz_used      = irecv[0];
1170     info->nz_allocated = irecv[1];
1171     info->nz_unneeded  = irecv[2];
1172     info->memory       = irecv[3];
1173     info->mallocs      = irecv[4];
1174   } else if (flag == MAT_GLOBAL_SUM) {
1175     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1176 
1177     info->nz_used      = irecv[0];
1178     info->nz_allocated = irecv[1];
1179     info->nz_unneeded  = irecv[2];
1180     info->memory       = irecv[3];
1181     info->mallocs      = irecv[4];
1182   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1183   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1184   info->fill_ratio_needed = 0;
1185   info->factor_mallocs    = 0;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1191 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1192 {
1193   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1194   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   switch (op) {
1199   case MAT_NEW_NONZERO_LOCATIONS:
1200   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1201   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1202   case MAT_KEEP_NONZERO_PATTERN:
1203   case MAT_NEW_NONZERO_LOCATION_ERR:
1204     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1206     break;
1207   case MAT_ROW_ORIENTED:
1208     a->roworiented = flg;
1209 
1210     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1211     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1212     break;
1213   case MAT_NEW_DIAGONALS:
1214     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1215     break;
1216   case MAT_IGNORE_OFF_PROC_ENTRIES:
1217     a->donotstash = flg;
1218     break;
1219   case MAT_USE_HASH_TABLE:
1220     a->ht_flag = flg;
1221     break;
1222   case MAT_HERMITIAN:
1223     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225 
1226     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1227     break;
1228   case MAT_SPD:
1229     A->spd_set = PETSC_TRUE;
1230     A->spd     = flg;
1231     if (flg) {
1232       A->symmetric                  = PETSC_TRUE;
1233       A->structurally_symmetric     = PETSC_TRUE;
1234       A->symmetric_set              = PETSC_TRUE;
1235       A->structurally_symmetric_set = PETSC_TRUE;
1236     }
1237     break;
1238   case MAT_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_STRUCTURALLY_SYMMETRIC:
1242     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1243     break;
1244   case MAT_SYMMETRY_ETERNAL:
1245     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1246     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1247     break;
1248   case MAT_IGNORE_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_ERROR_LOWER_TRIANGULAR:
1252     aA->ignore_ltriangular = flg;
1253     break;
1254   case MAT_GETROW_UPPERTRIANGULAR:
1255     aA->getrow_utriangular = flg;
1256     break;
1257   default:
1258     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1265 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1266 {
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270   if (MAT_INITIAL_MATRIX || *B != A) {
1271     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1278 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1279 {
1280   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1281   Mat            a     = baij->A, b=baij->B;
1282   PetscErrorCode ierr;
1283   PetscInt       nv,m,n;
1284   PetscBool      flg;
1285 
1286   PetscFunctionBegin;
1287   if (ll != rr) {
1288     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1289     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1290   }
1291   if (!ll) PetscFunctionReturn(0);
1292 
1293   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1294   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1295 
1296   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1297   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1298 
1299   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1300 
1301   /* left diagonalscale the off-diagonal part */
1302   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1303 
1304   /* scale the diagonal part */
1305   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1306 
1307   /* right diagonalscale the off-diagonal part */
1308   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1309   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1315 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1316 {
1317   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatEqual_MPISBAIJ"
1329 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1330 {
1331   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1332   Mat            a,b,c,d;
1333   PetscBool      flg;
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   a = matA->A; b = matA->B;
1338   c = matB->A; d = matB->B;
1339 
1340   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1341   if (flg) {
1342     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1343   }
1344   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatCopy_MPISBAIJ"
1350 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1351 {
1352   PetscErrorCode ierr;
1353   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1354   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1355 
1356   PetscFunctionBegin;
1357   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1358   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1359     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1360     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1361     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1362   } else {
1363     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1364     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1371 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1372 {
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1382 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1383 {
1384   PetscErrorCode ierr;
1385   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1386   PetscBLASInt   bnz,one=1;
1387   Mat_SeqSBAIJ   *xa,*ya;
1388   Mat_SeqBAIJ    *xb,*yb;
1389 
1390   PetscFunctionBegin;
1391   if (str == SAME_NONZERO_PATTERN) {
1392     PetscScalar alpha = a;
1393     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1394     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1395     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1396     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1397     xb   = (Mat_SeqBAIJ*)xx->B->data;
1398     yb   = (Mat_SeqBAIJ*)yy->B->data;
1399     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1400     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1401   } else {
1402     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1403     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1404     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1411 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1412 {
1413   PetscErrorCode ierr;
1414   PetscInt       i;
1415   PetscBool      flg;
1416 
1417   PetscFunctionBegin;
1418   for (i=0; i<n; i++) {
1419     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1420     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1421   }
1422   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 
1427 /* -------------------------------------------------------------------*/
1428 static struct _MatOps MatOps_Values = {
1429         MatSetValues_MPISBAIJ,
1430         MatGetRow_MPISBAIJ,
1431         MatRestoreRow_MPISBAIJ,
1432         MatMult_MPISBAIJ,
1433 /*  4*/ MatMultAdd_MPISBAIJ,
1434         MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1435         MatMultAdd_MPISBAIJ,
1436         0,
1437         0,
1438         0,
1439 /* 10*/ 0,
1440         0,
1441         0,
1442         MatSOR_MPISBAIJ,
1443         MatTranspose_MPISBAIJ,
1444 /* 15*/ MatGetInfo_MPISBAIJ,
1445         MatEqual_MPISBAIJ,
1446         MatGetDiagonal_MPISBAIJ,
1447         MatDiagonalScale_MPISBAIJ,
1448         MatNorm_MPISBAIJ,
1449 /* 20*/ MatAssemblyBegin_MPISBAIJ,
1450         MatAssemblyEnd_MPISBAIJ,
1451         MatSetOption_MPISBAIJ,
1452         MatZeroEntries_MPISBAIJ,
1453 /* 24*/ 0,
1454         0,
1455         0,
1456         0,
1457         0,
1458 /* 29*/ MatSetUp_MPISBAIJ,
1459         0,
1460         0,
1461         0,
1462         0,
1463 /* 34*/ MatDuplicate_MPISBAIJ,
1464         0,
1465         0,
1466         0,
1467         0,
1468 /* 39*/ MatAXPY_MPISBAIJ,
1469         MatGetSubMatrices_MPISBAIJ,
1470         MatIncreaseOverlap_MPISBAIJ,
1471         MatGetValues_MPISBAIJ,
1472         MatCopy_MPISBAIJ,
1473 /* 44*/ 0,
1474         MatScale_MPISBAIJ,
1475         0,
1476         0,
1477         0,
1478 /* 49*/ 0,
1479         0,
1480         0,
1481         0,
1482         0,
1483 /* 54*/ 0,
1484         0,
1485         MatSetUnfactored_MPISBAIJ,
1486         0,
1487         MatSetValuesBlocked_MPISBAIJ,
1488 /* 59*/ 0,
1489         0,
1490         0,
1491         0,
1492         0,
1493 /* 64*/ 0,
1494         0,
1495         0,
1496         0,
1497         0,
1498 /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1499         0,
1500         0,
1501         0,
1502         0,
1503 /* 74*/ 0,
1504         0,
1505         0,
1506         0,
1507         0,
1508 /* 79*/ 0,
1509         0,
1510         0,
1511         0,
1512         MatLoad_MPISBAIJ,
1513 /* 84*/ 0,
1514         0,
1515         0,
1516         0,
1517         0,
1518 /* 89*/ 0,
1519         0,
1520         0,
1521         0,
1522         0,
1523 /* 94*/ 0,
1524         0,
1525         0,
1526         0,
1527         0,
1528 /* 99*/ 0,
1529         0,
1530         0,
1531         0,
1532         0,
1533 /*104*/ 0,
1534         MatRealPart_MPISBAIJ,
1535         MatImaginaryPart_MPISBAIJ,
1536         MatGetRowUpperTriangular_MPISBAIJ,
1537         MatRestoreRowUpperTriangular_MPISBAIJ,
1538 /*109*/ 0,
1539         0,
1540         0,
1541         0,
1542         0,
1543 /*114*/ 0,
1544         0,
1545         0,
1546         0,
1547         0,
1548 /*119*/ 0,
1549         0,
1550         0,
1551         0
1552 };
1553 
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1558 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1559 {
1560   PetscFunctionBegin;
1561   *a = ((Mat_MPISBAIJ*)A->data)->A;
1562   PetscFunctionReturn(0);
1563 }
1564 EXTERN_C_END
1565 
1566 EXTERN_C_BEGIN
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1569 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1570 {
1571   Mat_MPISBAIJ   *b;
1572   PetscErrorCode ierr;
1573   PetscInt       i,mbs,Mbs;
1574 
1575   PetscFunctionBegin;
1576   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1577   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1578   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1579   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1580 
1581   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1582   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1583   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1584   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1585   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1586 
1587   if (d_nnz) {
1588     for (i=0; i<B->rmap->n/bs; i++) {
1589       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1590     }
1591   }
1592   if (o_nnz) {
1593     for (i=0; i<B->rmap->n/bs; i++) {
1594       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1595     }
1596   }
1597 
1598   b   = (Mat_MPISBAIJ*)B->data;
1599   mbs = B->rmap->n/bs;
1600   Mbs = B->rmap->N/bs;
1601   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1602 
1603   B->rmap->bs = bs;
1604   b->bs2      = bs*bs;
1605   b->mbs      = mbs;
1606   b->nbs      = mbs;
1607   b->Mbs      = Mbs;
1608   b->Nbs      = Mbs;
1609 
1610   for (i=0; i<=b->size; i++) {
1611     b->rangebs[i] = B->rmap->range[i]/bs;
1612   }
1613   b->rstartbs = B->rmap->rstart/bs;
1614   b->rendbs   = B->rmap->rend/bs;
1615 
1616   b->cstartbs = B->cmap->rstart/bs;
1617   b->cendbs   = B->cmap->rend/bs;
1618 
1619   if (!B->preallocated) {
1620     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1621     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1622     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1623     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1624     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1625     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1626     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1627     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1628     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1629   }
1630 
1631   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1632   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1633 
1634   B->preallocated = PETSC_TRUE;
1635   PetscFunctionReturn(0);
1636 }
1637 EXTERN_C_END
1638 
1639 EXTERN_C_BEGIN
1640 #undef __FUNCT__
1641 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1642 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1643 {
1644   PetscInt       m,rstart,cstart,cend;
1645   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1646   const PetscInt *JJ    =0;
1647   PetscScalar    *values=0;
1648   PetscErrorCode ierr;
1649 
1650   PetscFunctionBegin;
1651   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1652   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1653   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1654   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1655   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1656   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1657   m      = B->rmap->n/bs;
1658   rstart = B->rmap->rstart/bs;
1659   cstart = B->cmap->rstart/bs;
1660   cend   = B->cmap->rend/bs;
1661 
1662   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1663   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1664   for (i=0; i<m; i++) {
1665     nz = ii[i+1] - ii[i];
1666     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1667     nz_max = PetscMax(nz_max,nz);
1668     JJ     = jj + ii[i];
1669     for (j=0; j<nz; j++) {
1670       if (*JJ >= cstart) break;
1671       JJ++;
1672     }
1673     d = 0;
1674     for (; j<nz; j++) {
1675       if (*JJ++ >= cend) break;
1676       d++;
1677     }
1678     d_nnz[i] = d;
1679     o_nnz[i] = nz - d;
1680   }
1681   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1682   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1683 
1684   values = (PetscScalar*)V;
1685   if (!values) {
1686     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1687     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1688   }
1689   for (i=0; i<m; i++) {
1690     PetscInt          row    = i + rstart;
1691     PetscInt          ncols  = ii[i+1] - ii[i];
1692     const PetscInt    *icols = jj + ii[i];
1693     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1694     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1695   }
1696 
1697   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1698   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 EXTERN_C_END
1704 
1705 EXTERN_C_BEGIN
1706 #if defined(PETSC_HAVE_MUMPS)
1707 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1708 #endif
1709 #if defined(PETSC_HAVE_PASTIX)
1710 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1711 #endif
1712 EXTERN_C_END
1713 
1714 /*MC
1715    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1716    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1717    the matrix is stored.
1718 
1719   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1720   can call MatSetOption(Mat, MAT_HERMITIAN);
1721 
1722    Options Database Keys:
1723 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1724 
1725   Level: beginner
1726 
1727 .seealso: MatCreateMPISBAIJ
1728 M*/
1729 
1730 EXTERN_C_BEGIN
1731 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1732 EXTERN_C_END
1733 
1734 EXTERN_C_BEGIN
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatCreate_MPISBAIJ"
1737 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1738 {
1739   Mat_MPISBAIJ   *b;
1740   PetscErrorCode ierr;
1741   PetscBool      flg;
1742 
1743   PetscFunctionBegin;
1744   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1745   B->data = (void*)b;
1746   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1747 
1748   B->ops->destroy = MatDestroy_MPISBAIJ;
1749   B->ops->view    = MatView_MPISBAIJ;
1750   B->assembled    = PETSC_FALSE;
1751   B->insertmode   = NOT_SET_VALUES;
1752 
1753   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1754   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1755 
1756   /* build local table of row and column ownerships */
1757   ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1758 
1759   /* build cache for off array entries formed */
1760   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1761 
1762   b->donotstash  = PETSC_FALSE;
1763   b->colmap      = NULL;
1764   b->garray      = NULL;
1765   b->roworiented = PETSC_TRUE;
1766 
1767   /* stuff used in block assembly */
1768   b->barray = 0;
1769 
1770   /* stuff used for matrix vector multiply */
1771   b->lvec    = 0;
1772   b->Mvctx   = 0;
1773   b->slvec0  = 0;
1774   b->slvec0b = 0;
1775   b->slvec1  = 0;
1776   b->slvec1a = 0;
1777   b->slvec1b = 0;
1778   b->sMvctx  = 0;
1779 
1780   /* stuff for MatGetRow() */
1781   b->rowindices   = 0;
1782   b->rowvalues    = 0;
1783   b->getrowactive = PETSC_FALSE;
1784 
1785   /* hash table stuff */
1786   b->ht           = 0;
1787   b->hd           = 0;
1788   b->ht_size      = 0;
1789   b->ht_flag      = PETSC_FALSE;
1790   b->ht_fact      = 0;
1791   b->ht_total_ct  = 0;
1792   b->ht_insert_ct = 0;
1793 
1794   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1795   b->ijonly = PETSC_FALSE;
1796 
1797   b->in_loc = 0;
1798   b->v_loc  = 0;
1799   b->n_loc  = 0;
1800   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1801   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1802   if (flg) {
1803     PetscReal fact = 1.39;
1804     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1805     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1806     if (fact <= 1.0) fact = 1.39;
1807     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1808     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1809   }
1810   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1811 
1812 #if defined(PETSC_HAVE_PASTIX)
1813   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1814                                            "MatGetFactor_mpisbaij_pastix",
1815                                            MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1816 #endif
1817 #if defined(PETSC_HAVE_MUMPS)
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1819                                            "MatGetFactor_sbaij_mumps",
1820                                            MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1821 #endif
1822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1823                                            "MatStoreValues_MPISBAIJ",
1824                                            MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1826                                            "MatRetrieveValues_MPISBAIJ",
1827                                            MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1829                                            "MatGetDiagonalBlock_MPISBAIJ",
1830                                            MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1832                                            "MatMPISBAIJSetPreallocation_MPISBAIJ",
1833                                            MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1835                                            "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1836                                            MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1838                                            "MatConvert_MPISBAIJ_MPISBSTRM",
1839                                            MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1840 
1841   B->symmetric                  = PETSC_TRUE;
1842   B->structurally_symmetric     = PETSC_TRUE;
1843   B->symmetric_set              = PETSC_TRUE;
1844   B->structurally_symmetric_set = PETSC_TRUE;
1845 
1846   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 EXTERN_C_END
1850 
1851 /*MC
1852    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1853 
1854    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1855    and MATMPISBAIJ otherwise.
1856 
1857    Options Database Keys:
1858 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1859 
1860   Level: beginner
1861 
1862 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1863 M*/
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1867 /*@C
1868    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1869    the user should preallocate the matrix storage by setting the parameters
1870    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1871    performance can be increased by more than a factor of 50.
1872 
1873    Collective on Mat
1874 
1875    Input Parameters:
1876 +  A - the matrix
1877 .  bs   - size of blockk
1878 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1879            submatrix  (same for all local rows)
1880 .  d_nnz - array containing the number of block nonzeros in the various block rows
1881            in the upper triangular and diagonal part of the in diagonal portion of the local
1882            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1883            for the diagonal entry and set a value even if it is zero.
1884 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1885            submatrix (same for all local rows).
1886 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1887            off-diagonal portion of the local submatrix that is right of the diagonal
1888            (possibly different for each block row) or NULL.
1889 
1890 
1891    Options Database Keys:
1892 .   -mat_no_unroll - uses code that does not unroll the loops in the
1893                      block calculations (much slower)
1894 .   -mat_block_size - size of the blocks to use
1895 
1896    Notes:
1897 
1898    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1899    than it must be used on all processors that share the object for that argument.
1900 
1901    If the *_nnz parameter is given then the *_nz parameter is ignored
1902 
1903    Storage Information:
1904    For a square global matrix we define each processor's diagonal portion
1905    to be its local rows and the corresponding columns (a square submatrix);
1906    each processor's off-diagonal portion encompasses the remainder of the
1907    local matrix (a rectangular submatrix).
1908 
1909    The user can specify preallocated storage for the diagonal part of
1910    the local submatrix with either d_nz or d_nnz (not both).  Set
1911    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1912    memory allocation.  Likewise, specify preallocated storage for the
1913    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1914 
1915    You can call MatGetInfo() to get information on how effective the preallocation was;
1916    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1917    You can also run with the option -info and look for messages with the string
1918    malloc in them to see if additional memory allocation was needed.
1919 
1920    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1921    the figure below we depict these three local rows and all columns (0-11).
1922 
1923 .vb
1924            0 1 2 3 4 5 6 7 8 9 10 11
1925           -------------------
1926    row 3  |  . . . d d d o o o o o o
1927    row 4  |  . . . d d d o o o o o o
1928    row 5  |  . . . d d d o o o o o o
1929           -------------------
1930 .ve
1931 
1932    Thus, any entries in the d locations are stored in the d (diagonal)
1933    submatrix, and any entries in the o locations are stored in the
1934    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1935    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1936 
1937    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1938    plus the diagonal part of the d matrix,
1939    and o_nz should indicate the number of block nonzeros per row in the o matrix
1940 
1941    In general, for PDE problems in which most nonzeros are near the diagonal,
1942    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1943    or you will get TERRIBLE performance; see the users' manual chapter on
1944    matrices.
1945 
1946    Level: intermediate
1947 
1948 .keywords: matrix, block, aij, compressed row, sparse, parallel
1949 
1950 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1951 @*/
1952 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1958   PetscValidType(B,1);
1959   PetscValidLogicalCollectiveInt(B,bs,2);
1960   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatCreateSBAIJ"
1966 /*@C
1967    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1968    (block compressed row).  For good matrix assembly performance
1969    the user should preallocate the matrix storage by setting the parameters
1970    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1971    performance can be increased by more than a factor of 50.
1972 
1973    Collective on MPI_Comm
1974 
1975    Input Parameters:
1976 +  comm - MPI communicator
1977 .  bs   - size of blockk
1978 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1979            This value should be the same as the local size used in creating the
1980            y vector for the matrix-vector product y = Ax.
1981 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1982            This value should be the same as the local size used in creating the
1983            x vector for the matrix-vector product y = Ax.
1984 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1985 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1986 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1987            submatrix  (same for all local rows)
1988 .  d_nnz - array containing the number of block nonzeros in the various block rows
1989            in the upper triangular portion of the in diagonal portion of the local
1990            (possibly different for each block block row) or NULL.
1991            If you plan to factor the matrix you must leave room for the diagonal entry and
1992            set its value even if it is zero.
1993 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1994            submatrix (same for all local rows).
1995 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1996            off-diagonal portion of the local submatrix (possibly different for
1997            each block row) or NULL.
1998 
1999    Output Parameter:
2000 .  A - the matrix
2001 
2002    Options Database Keys:
2003 .   -mat_no_unroll - uses code that does not unroll the loops in the
2004                      block calculations (much slower)
2005 .   -mat_block_size - size of the blocks to use
2006 .   -mat_mpi - use the parallel matrix data structures even on one processor
2007                (defaults to using SeqBAIJ format on one processor)
2008 
2009    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2010    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2011    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2012 
2013    Notes:
2014    The number of rows and columns must be divisible by blocksize.
2015    This matrix type does not support complex Hermitian operation.
2016 
2017    The user MUST specify either the local or global matrix dimensions
2018    (possibly both).
2019 
2020    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2021    than it must be used on all processors that share the object for that argument.
2022 
2023    If the *_nnz parameter is given then the *_nz parameter is ignored
2024 
2025    Storage Information:
2026    For a square global matrix we define each processor's diagonal portion
2027    to be its local rows and the corresponding columns (a square submatrix);
2028    each processor's off-diagonal portion encompasses the remainder of the
2029    local matrix (a rectangular submatrix).
2030 
2031    The user can specify preallocated storage for the diagonal part of
2032    the local submatrix with either d_nz or d_nnz (not both).  Set
2033    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2034    memory allocation.  Likewise, specify preallocated storage for the
2035    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2036 
2037    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2038    the figure below we depict these three local rows and all columns (0-11).
2039 
2040 .vb
2041            0 1 2 3 4 5 6 7 8 9 10 11
2042           -------------------
2043    row 3  |  . . . d d d o o o o o o
2044    row 4  |  . . . d d d o o o o o o
2045    row 5  |  . . . d d d o o o o o o
2046           -------------------
2047 .ve
2048 
2049    Thus, any entries in the d locations are stored in the d (diagonal)
2050    submatrix, and any entries in the o locations are stored in the
2051    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2052    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2053 
2054    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2055    plus the diagonal part of the d matrix,
2056    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2057    In general, for PDE problems in which most nonzeros are near the diagonal,
2058    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2059    or you will get TERRIBLE performance; see the users' manual chapter on
2060    matrices.
2061 
2062    Level: intermediate
2063 
2064 .keywords: matrix, block, aij, compressed row, sparse, parallel
2065 
2066 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2067 @*/
2068 
2069 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2070 {
2071   PetscErrorCode ierr;
2072   PetscMPIInt    size;
2073 
2074   PetscFunctionBegin;
2075   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2076   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2077   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2078   if (size > 1) {
2079     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2080     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2081   } else {
2082     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2083     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2091 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2092 {
2093   Mat            mat;
2094   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2095   PetscErrorCode ierr;
2096   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2097   PetscScalar    *array;
2098 
2099   PetscFunctionBegin;
2100   *newmat = 0;
2101 
2102   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2103   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2104   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2105   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2106   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2107   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2108 
2109   mat->factortype   = matin->factortype;
2110   mat->preallocated = PETSC_TRUE;
2111   mat->assembled    = PETSC_TRUE;
2112   mat->insertmode   = NOT_SET_VALUES;
2113 
2114   a      = (Mat_MPISBAIJ*)mat->data;
2115   a->bs2 = oldmat->bs2;
2116   a->mbs = oldmat->mbs;
2117   a->nbs = oldmat->nbs;
2118   a->Mbs = oldmat->Mbs;
2119   a->Nbs = oldmat->Nbs;
2120 
2121 
2122   a->size         = oldmat->size;
2123   a->rank         = oldmat->rank;
2124   a->donotstash   = oldmat->donotstash;
2125   a->roworiented  = oldmat->roworiented;
2126   a->rowindices   = 0;
2127   a->rowvalues    = 0;
2128   a->getrowactive = PETSC_FALSE;
2129   a->barray       = 0;
2130   a->rstartbs     = oldmat->rstartbs;
2131   a->rendbs       = oldmat->rendbs;
2132   a->cstartbs     = oldmat->cstartbs;
2133   a->cendbs       = oldmat->cendbs;
2134 
2135   /* hash table stuff */
2136   a->ht           = 0;
2137   a->hd           = 0;
2138   a->ht_size      = 0;
2139   a->ht_flag      = oldmat->ht_flag;
2140   a->ht_fact      = oldmat->ht_fact;
2141   a->ht_total_ct  = 0;
2142   a->ht_insert_ct = 0;
2143 
2144   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2145   if (oldmat->colmap) {
2146 #if defined(PETSC_USE_CTABLE)
2147     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2148 #else
2149     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2150     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2151     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2152 #endif
2153   } else a->colmap = 0;
2154 
2155   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2156     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2157     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2158     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2159   } else a->garray = 0;
2160 
2161   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2162   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2163   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2164   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2165   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2166 
2167   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2168   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2169   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2170   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2171 
2172   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2173   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2174   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2175   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2176   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2177   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2178   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2179   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2180   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2181   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2182   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2183   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2184   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2185 
2186   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2187   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2188   a->sMvctx = oldmat->sMvctx;
2189   ierr      = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2190 
2191   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2192   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2193   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2194   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2195   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2196   *newmat = mat;
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatLoad_MPISBAIJ"
2202 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2203 {
2204   PetscErrorCode ierr;
2205   PetscInt       i,nz,j,rstart,rend;
2206   PetscScalar    *vals,*buf;
2207   MPI_Comm       comm;
2208   MPI_Status     status;
2209   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2210   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2211   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2212   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2213   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2214   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2215   int            fd;
2216 
2217   PetscFunctionBegin;
2218   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2219   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2220   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2221   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2222 
2223   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2224   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2225   if (!rank) {
2226     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2227     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2228     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2229     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2230   }
2231 
2232   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2233 
2234   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2235   M    = header[1];
2236   N    = header[2];
2237 
2238   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2239   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2240   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2241 
2242   /* If global sizes are set, check if they are consistent with that given in the file */
2243   if (sizesset) {
2244     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2245   }
2246   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2247   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2248 
2249   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2250 
2251   /*
2252      This code adds extra rows to make sure the number of rows is
2253      divisible by the blocksize
2254   */
2255   Mbs        = M/bs;
2256   extra_rows = bs - M + bs*(Mbs);
2257   if (extra_rows == bs) extra_rows = 0;
2258   else                  Mbs++;
2259   if (extra_rows &&!rank) {
2260     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2261   }
2262 
2263   /* determine ownership of all rows */
2264   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2265     mbs = Mbs/size + ((Mbs % size) > rank);
2266     m   = mbs*bs;
2267   } else { /* User Set */
2268     m   = newmat->rmap->n;
2269     mbs = m/bs;
2270   }
2271   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2272   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2273   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2274   rowners[0] = 0;
2275   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2276   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2277   rstart = rowners[rank];
2278   rend   = rowners[rank+1];
2279 
2280   /* distribute row lengths to all processors */
2281   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2282   if (!rank) {
2283     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2284     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2285     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2286     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2287     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2288     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2289     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2290   } else {
2291     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2292   }
2293 
2294   if (!rank) {   /* procs[0] */
2295     /* calculate the number of nonzeros on each processor */
2296     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2297     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2298     for (i=0; i<size; i++) {
2299       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2300         procsnz[i] += rowlengths[j];
2301       }
2302     }
2303     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2304 
2305     /* determine max buffer needed and allocate it */
2306     maxnz = 0;
2307     for (i=0; i<size; i++) {
2308       maxnz = PetscMax(maxnz,procsnz[i]);
2309     }
2310     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2311 
2312     /* read in my part of the matrix column indices  */
2313     nz     = procsnz[0];
2314     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2315     mycols = ibuf;
2316     if (size == 1) nz -= extra_rows;
2317     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2318     if (size == 1) {
2319       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2320     }
2321 
2322     /* read in every ones (except the last) and ship off */
2323     for (i=1; i<size-1; i++) {
2324       nz   = procsnz[i];
2325       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2326       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2327     }
2328     /* read in the stuff for the last proc */
2329     if (size != 1) {
2330       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2331       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2332       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2333       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2334     }
2335     ierr = PetscFree(cols);CHKERRQ(ierr);
2336   } else {  /* procs[i], i>0 */
2337     /* determine buffer space needed for message */
2338     nz = 0;
2339     for (i=0; i<m; i++) nz += locrowlens[i];
2340     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2341     mycols = ibuf;
2342     /* receive message of column indices*/
2343     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2344     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2345     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2346   }
2347 
2348   /* loop over local rows, determining number of off diagonal entries */
2349   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2350   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2351   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2352   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2353   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2354   rowcount = 0;
2355   nzcount  = 0;
2356   for (i=0; i<mbs; i++) {
2357     dcount  = 0;
2358     odcount = 0;
2359     for (j=0; j<bs; j++) {
2360       kmax = locrowlens[rowcount];
2361       for (k=0; k<kmax; k++) {
2362         tmp = mycols[nzcount++]/bs; /* block col. index */
2363         if (!mask[tmp]) {
2364           mask[tmp] = 1;
2365           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2366           else masked1[dcount++] = tmp; /* entry in diag portion */
2367         }
2368       }
2369       rowcount++;
2370     }
2371 
2372     dlens[i]  = dcount;  /* d_nzz[i] */
2373     odlens[i] = odcount; /* o_nzz[i] */
2374 
2375     /* zero out the mask elements we set */
2376     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2377     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2378   }
2379   if (!sizesset) {
2380     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2381   }
2382   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2383   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2384 
2385   if (!rank) {
2386     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2387     /* read in my part of the matrix numerical values  */
2388     nz     = procsnz[0];
2389     vals   = buf;
2390     mycols = ibuf;
2391     if (size == 1) nz -= extra_rows;
2392     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2393     if (size == 1) {
2394       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2395     }
2396 
2397     /* insert into matrix */
2398     jj = rstart*bs;
2399     for (i=0; i<m; i++) {
2400       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2401       mycols += locrowlens[i];
2402       vals   += locrowlens[i];
2403       jj++;
2404     }
2405 
2406     /* read in other processors (except the last one) and ship out */
2407     for (i=1; i<size-1; i++) {
2408       nz   = procsnz[i];
2409       vals = buf;
2410       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2411       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2412     }
2413     /* the last proc */
2414     if (size != 1) {
2415       nz   = procsnz[i] - extra_rows;
2416       vals = buf;
2417       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2418       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2419       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2420     }
2421     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2422 
2423   } else {
2424     /* receive numeric values */
2425     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2426 
2427     /* receive message of values*/
2428     vals   = buf;
2429     mycols = ibuf;
2430     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2431     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2432     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2433 
2434     /* insert into matrix */
2435     jj = rstart*bs;
2436     for (i=0; i<m; i++) {
2437       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2438       mycols += locrowlens[i];
2439       vals   += locrowlens[i];
2440       jj++;
2441     }
2442   }
2443 
2444   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2445   ierr = PetscFree(buf);CHKERRQ(ierr);
2446   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2447   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2448   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2449   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2450   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2451   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2457 /*XXXXX@
2458    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2459 
2460    Input Parameters:
2461 .  mat  - the matrix
2462 .  fact - factor
2463 
2464    Not Collective on Mat, each process can have a different hash factor
2465 
2466    Level: advanced
2467 
2468   Notes:
2469    This can also be set by the command line option: -mat_use_hash_table fact
2470 
2471 .keywords: matrix, hashtable, factor, HT
2472 
2473 .seealso: MatSetOption()
2474 @XXXXX*/
2475 
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2479 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2480 {
2481   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2482   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2483   PetscReal      atmp;
2484   PetscReal      *work,*svalues,*rvalues;
2485   PetscErrorCode ierr;
2486   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2487   PetscMPIInt    rank,size;
2488   PetscInt       *rowners_bs,dest,count,source;
2489   PetscScalar    *va;
2490   MatScalar      *ba;
2491   MPI_Status     stat;
2492 
2493   PetscFunctionBegin;
2494   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2495   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2496   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2497 
2498   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2499   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2500 
2501   bs  = A->rmap->bs;
2502   mbs = a->mbs;
2503   Mbs = a->Mbs;
2504   ba  = b->a;
2505   bi  = b->i;
2506   bj  = b->j;
2507 
2508   /* find ownerships */
2509   rowners_bs = A->rmap->range;
2510 
2511   /* each proc creates an array to be distributed */
2512   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2513   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2514 
2515   /* row_max for B */
2516   if (rank != size-1) {
2517     for (i=0; i<mbs; i++) {
2518       ncols = bi[1] - bi[0]; bi++;
2519       brow  = bs*i;
2520       for (j=0; j<ncols; j++) {
2521         bcol = bs*(*bj);
2522         for (kcol=0; kcol<bs; kcol++) {
2523           col  = bcol + kcol;                /* local col index */
2524           col += rowners_bs[rank+1];      /* global col index */
2525           for (krow=0; krow<bs; krow++) {
2526             atmp = PetscAbsScalar(*ba); ba++;
2527             row  = brow + krow;   /* local row index */
2528             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2529             if (work[col] < atmp) work[col] = atmp;
2530           }
2531         }
2532         bj++;
2533       }
2534     }
2535 
2536     /* send values to its owners */
2537     for (dest=rank+1; dest<size; dest++) {
2538       svalues = work + rowners_bs[dest];
2539       count   = rowners_bs[dest+1]-rowners_bs[dest];
2540       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2541     }
2542   }
2543 
2544   /* receive values */
2545   if (rank) {
2546     rvalues = work;
2547     count   = rowners_bs[rank+1]-rowners_bs[rank];
2548     for (source=0; source<rank; source++) {
2549       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2550       /* process values */
2551       for (i=0; i<count; i++) {
2552         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2553       }
2554     }
2555   }
2556 
2557   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2558   ierr = PetscFree(work);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatSOR_MPISBAIJ"
2564 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2565 {
2566   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2567   PetscErrorCode    ierr;
2568   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2569   PetscScalar       *x,*ptr,*from;
2570   Vec               bb1;
2571   const PetscScalar *b;
2572 
2573   PetscFunctionBegin;
2574   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2575   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2576 
2577   if (flag == SOR_APPLY_UPPER) {
2578     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2579     PetscFunctionReturn(0);
2580   }
2581 
2582   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2583     if (flag & SOR_ZERO_INITIAL_GUESS) {
2584       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2585       its--;
2586     }
2587 
2588     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2589     while (its--) {
2590 
2591       /* lower triangular part: slvec0b = - B^T*xx */
2592       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2593 
2594       /* copy xx into slvec0a */
2595       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2596       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2597       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2598       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2599 
2600       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2601 
2602       /* copy bb into slvec1a */
2603       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2604       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2605       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2606       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2607 
2608       /* set slvec1b = 0 */
2609       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2610 
2611       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2612       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2613       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2614       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2615 
2616       /* upper triangular part: bb1 = bb1 - B*x */
2617       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2618 
2619       /* local diagonal sweep */
2620       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2621     }
2622     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2623   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2624     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2625   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2626     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2627   } else if (flag & SOR_EISENSTAT) {
2628     Vec               xx1;
2629     PetscBool         hasop;
2630     const PetscScalar *diag;
2631     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2632     PetscInt          i,n;
2633 
2634     if (!mat->xx1) {
2635       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2636       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2637     }
2638     xx1 = mat->xx1;
2639     bb1 = mat->bb1;
2640 
2641     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2642 
2643     if (!mat->diag) {
2644       /* this is wrong for same matrix with new nonzero values */
2645       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2646       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2647     }
2648     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2649 
2650     if (hasop) {
2651       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2652       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2653     } else {
2654       /*
2655           These two lines are replaced by code that may be a bit faster for a good compiler
2656       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2657       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2658       */
2659       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2660       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2661       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2662       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2663       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2664       if (omega == 1.0) {
2665         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2666         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2667       } else {
2668         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2669         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2670       }
2671       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2672       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2673       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2674       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2675     }
2676 
2677     /* multiply off-diagonal portion of matrix */
2678     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2679     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2680     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2681     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2682     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2683     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2684     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2685     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2686     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2688 
2689     /* local sweep */
2690     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2691     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2692   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2693   PetscFunctionReturn(0);
2694 }
2695 
2696 #undef __FUNCT__
2697 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2698 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2699 {
2700   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2701   PetscErrorCode ierr;
2702   Vec            lvec1,bb1;
2703 
2704   PetscFunctionBegin;
2705   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2706   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2707 
2708   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2709     if (flag & SOR_ZERO_INITIAL_GUESS) {
2710       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2711       its--;
2712     }
2713 
2714     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2715     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2716     while (its--) {
2717       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718 
2719       /* lower diagonal part: bb1 = bb - B^T*xx */
2720       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2721       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2722 
2723       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2724       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2725       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2726 
2727       /* upper diagonal part: bb1 = bb1 - B*x */
2728       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2729       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2730 
2731       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2732 
2733       /* diagonal sweep */
2734       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2735     }
2736     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2737     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2738   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2744 /*@
2745      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2746          CSR format the local rows.
2747 
2748    Collective on MPI_Comm
2749 
2750    Input Parameters:
2751 +  comm - MPI communicator
2752 .  bs - the block size, only a block size of 1 is supported
2753 .  m - number of local rows (Cannot be PETSC_DECIDE)
2754 .  n - This value should be the same as the local size used in creating the
2755        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2756        calculated if N is given) For square matrices n is almost always m.
2757 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2758 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2759 .   i - row indices
2760 .   j - column indices
2761 -   a - matrix values
2762 
2763    Output Parameter:
2764 .   mat - the matrix
2765 
2766    Level: intermediate
2767 
2768    Notes:
2769        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2770      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2771      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2772 
2773        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2774 
2775 .keywords: matrix, aij, compressed row, sparse, parallel
2776 
2777 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2778           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2779 @*/
2780 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2781 {
2782   PetscErrorCode ierr;
2783 
2784 
2785   PetscFunctionBegin;
2786   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2787   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2788   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2789   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2790   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2791   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2798 /*@C
2799    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2800    (the default parallel PETSc format).
2801 
2802    Collective on MPI_Comm
2803 
2804    Input Parameters:
2805 +  A - the matrix
2806 .  bs - the block size
2807 .  i - the indices into j for the start of each local row (starts with zero)
2808 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2809 -  v - optional values in the matrix
2810 
2811    Level: developer
2812 
2813 .keywords: matrix, aij, compressed row, sparse, parallel
2814 
2815 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2816 @*/
2817 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2818 {
2819   PetscErrorCode ierr;
2820 
2821   PetscFunctionBegin;
2822   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 
2827