xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision ce94432eddcd14845bc7e8083b7f8ea723b9bf7d)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55   { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60     bcol = col/bs; \
61     ridx = row % bs; cidx = col % bs; \
62     low  = 0; high = nrow; \
63     while (high-low > 3) { \
64       t = (low+high)/2; \
65       if (rp[t] > bcol) high = t; \
66       else              low  = t; \
67     } \
68     for (_i=low; _i<high; _i++) { \
69       if (rp[_i] > bcol) break; \
70       if (rp[_i] == bcol) { \
71         bap = ap + bs2*_i + bs*cidx + ridx; \
72         if (addv == ADD_VALUES) *bap += value;  \
73         else                    *bap  = value;  \
74         goto a_noinsert; \
75       } \
76     } \
77     if (a->nonew == 1) goto a_noinsert; \
78     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80     N = nrow++ - 1;  \
81     /* shift up all the later entries in this row */ \
82     for (ii=N; ii>=_i; ii--) { \
83       rp[ii+1] = rp[ii]; \
84       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85     } \
86     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87     rp[_i]                      = bcol;  \
88     ap[bs2*_i + bs*cidx + ridx] = value;  \
89 a_noinsert:; \
90     ailen[brow] = nrow; \
91   }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94   { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98     bcol = col/bs; \
99     ridx = row % bs; cidx = col % bs; \
100     low  = 0; high = nrow; \
101     while (high-low > 3) { \
102       t = (low+high)/2; \
103       if (rp[t] > bcol) high = t; \
104       else              low  = t; \
105     } \
106     for (_i=low; _i<high; _i++) { \
107       if (rp[_i] > bcol) break; \
108       if (rp[_i] == bcol) { \
109         bap = ap + bs2*_i + bs*cidx + ridx; \
110         if (addv == ADD_VALUES) *bap += value;  \
111         else                    *bap  = value;  \
112         goto b_noinsert; \
113       } \
114     } \
115     if (b->nonew == 1) goto b_noinsert; \
116     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118     N = nrow++ - 1;  \
119     /* shift up all the later entries in this row */ \
120     for (ii=N; ii>=_i; ii--) { \
121       rp[ii+1] = rp[ii]; \
122       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123     } \
124     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125     rp[_i]                      = bcol;  \
126     ap[bs2*_i + bs*cidx + ridx] = value;  \
127 b_noinsert:; \
128     bilen[brow] = nrow; \
129   }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat          A     = baij->A;
149   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar    *aa   =a->a;
152 
153   Mat         B     = baij->B;
154   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar   *ba   =b->a;
157 
158   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar *ap,*bap;
161 
162   /* for stash */
163   PetscInt  n_loc, *in_loc = NULL;
164   MatScalar *v_loc = NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash) {
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174 
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs) {
190           if (a->ignore_ltriangular) {
191             continue;    /* ignore lower triangular blocks */
192           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193         }
194         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
195           col  = in[j] - cstart_orig;         /* local col index */
196           brow = row/bs; bcol = col/bs;
197           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
198           if (roworiented) value = v[i*n+j];
199           else             value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined(PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col  =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B    = baij->B;
222               b    = (Mat_SeqBAIJ*)(B)->data;
223               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba   = b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j];
228           else             value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
235       if (!baij->donotstash) {
236         mat->assembled = PETSC_FALSE;
237         n_loc          = 0;
238         for (j=0; j<n; j++) {
239           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
240           in_loc[n_loc] = in[j];
241           if (roworiented) {
242             v_loc[n_loc] = v[i*n+j];
243           } else {
244             v_loc[n_loc] = v[j*m+i];
245           }
246           n_loc++;
247         }
248         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
258 {
259   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
260   const MatScalar *value;
261   MatScalar       *barray     =baij->barray;
262   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
263   PetscErrorCode  ierr;
264   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
265   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
266   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
267 
268   PetscFunctionBegin;
269   if (!barray) {
270     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
271     baij->barray = barray;
272   }
273 
274   if (roworiented) {
275     stepval = (n-1)*bs;
276   } else {
277     stepval = (m-1)*bs;
278   }
279   for (i=0; i<m; i++) {
280     if (im[i] < 0) continue;
281 #if defined(PETSC_USE_DEBUG)
282     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
283 #endif
284     if (im[i] >= rstart && im[i] < rend) {
285       row = im[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (im[i] > in[j]) {
288           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
289           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
290         }
291         /* If NumCol = 1 then a copy is not required */
292         if ((roworiented) && (n == 1)) {
293           barray = (MatScalar*) v + i*bs2;
294         } else if ((!roworiented) && (m == 1)) {
295           barray = (MatScalar*) v + j*bs2;
296         } else { /* Here a copy is required */
297           if (roworiented) {
298             value = v + i*(stepval+bs)*bs + j*bs;
299           } else {
300             value = v + j*(stepval+bs)*bs + i*bs;
301           }
302           for (ii=0; ii<bs; ii++,value+=stepval) {
303             for (jj=0; jj<bs; jj++) {
304               *barray++ = *value++;
305             }
306           }
307           barray -=bs2;
308         }
309 
310         if (in[j] >= cstart && in[j] < cend) {
311           col  = in[j] - cstart;
312           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
313         } else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined(PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined(PETSC_USE_CTABLE)
334             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col  = in[j];
342             }
343           } else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
349       if (!baij->donotstash) {
350         if (roworiented) {
351           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
352         } else {
353           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
354         }
355       }
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatGetValues_MPISBAIJ"
363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
364 {
365   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
366   PetscErrorCode ierr;
367   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
368   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
373     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
374     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
375       row = idxm[i] - bsrstart;
376       for (j=0; j<n; j++) {
377         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
378         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
379         if (idxn[j] >= bscstart && idxn[j] < bscend) {
380           col  = idxn[j] - bscstart;
381           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
382         } else {
383           if (!baij->colmap) {
384             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
385           }
386 #if defined(PETSC_USE_CTABLE)
387           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
388           data--;
389 #else
390           data = baij->colmap[idxn[j]/bs]-1;
391 #endif
392           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
393           else {
394             col  = data + idxn[j]%bs;
395             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
396           }
397         }
398       }
399     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatNorm_MPISBAIJ"
406 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
407 {
408   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscReal      sum[2],*lnorm2;
411 
412   PetscFunctionBegin;
413   if (baij->size == 1) {
414     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
415   } else {
416     if (type == NORM_FROBENIUS) {
417       ierr    = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
418       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
420       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
421       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
422       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
423       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
424       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
425     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
426       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
427       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
428       PetscReal    *rsum,*rsum2,vabs;
429       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
430       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
431       MatScalar    *v;
432 
433       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
434       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
435       /* Amat */
436       v = amat->a; jj = amat->j;
437       for (brow=0; brow<mbs; brow++) {
438         grow = bs*(rstart + brow);
439         nz   = amat->i[brow+1] - amat->i[brow];
440         for (bcol=0; bcol<nz; bcol++) {
441           gcol = bs*(rstart + *jj); jj++;
442           for (col=0; col<bs; col++) {
443             for (row=0; row<bs; row++) {
444               vabs            = PetscAbsScalar(*v); v++;
445               rsum[gcol+col] += vabs;
446               /* non-diagonal block */
447               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
448             }
449           }
450         }
451       }
452       /* Bmat */
453       v = bmat->a; jj = bmat->j;
454       for (brow=0; brow<mbs; brow++) {
455         grow = bs*(rstart + brow);
456         nz = bmat->i[brow+1] - bmat->i[brow];
457         for (bcol=0; bcol<nz; bcol++) {
458           gcol = bs*garray[*jj]; jj++;
459           for (col=0; col<bs; col++) {
460             for (row=0; row<bs; row++) {
461               vabs            = PetscAbsScalar(*v); v++;
462               rsum[gcol+col] += vabs;
463               rsum[grow+row] += vabs;
464             }
465           }
466         }
467       }
468       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
469       *norm = 0.0;
470       for (col=0; col<mat->cmap->N; col++) {
471         if (rsum2[col] > *norm) *norm = rsum2[col];
472       }
473       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
474     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
475   }
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
481 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
484   PetscErrorCode ierr;
485   PetscInt       nstash,reallocs;
486   InsertMode     addv;
487 
488   PetscFunctionBegin;
489   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
490 
491   /* make sure all processors are either in INSERTMODE or ADDMODE */
492   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
493   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
494   mat->insertmode = addv; /* in case this processor had no cache */
495 
496   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
497   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
498   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
499   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
500   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
501   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
507 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
508 {
509   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
510   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
511   PetscErrorCode ierr;
512   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
513   PetscInt       *row,*col;
514   PetscBool      other_disassembled;
515   PetscMPIInt    n;
516   PetscBool      r1,r2,r3;
517   MatScalar      *val;
518   InsertMode     addv = mat->insertmode;
519 
520   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
521   PetscFunctionBegin;
522   if (!baij->donotstash &&  !mat->nooffprocentries) {
523     while (1) {
524       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
525       if (!flg) break;
526 
527       for (i=0; i<n;) {
528         /* Now identify the consecutive vals belonging to the same row */
529         for (j=i,rstart=row[j]; j<n; j++) {
530           if (row[j] != rstart) break;
531         }
532         if (j < n) ncols = j-i;
533         else       ncols = n-i;
534         /* Now assemble all these values with a single function call */
535         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
536         i    = j;
537       }
538     }
539     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
540     /* Now process the block-stash. Since the values are stashed column-oriented,
541        set the roworiented flag to column oriented, and after MatSetValues()
542        restore the original flags */
543     r1 = baij->roworiented;
544     r2 = a->roworiented;
545     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
546 
547     baij->roworiented = PETSC_FALSE;
548     a->roworiented    = PETSC_FALSE;
549 
550     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
551     while (1) {
552       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
553       if (!flg) break;
554 
555       for (i=0; i<n;) {
556         /* Now identify the consecutive vals belonging to the same row */
557         for (j=i,rstart=row[j]; j<n; j++) {
558           if (row[j] != rstart) break;
559         }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i    = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567 
568     baij->roworiented = r1;
569     a->roworiented    = r2;
570 
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598 
599   baij->rowvalues = 0;
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs   = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
634       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_INFO) {
637       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
640       PetscFunctionReturn(0);
641     }
642   }
643 
644   if (isdraw) {
645     PetscDraw draw;
646     PetscBool isnull;
647     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
648     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
654   } else {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
671     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
676     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++;
687           a += bs;
688         }
689       }
690     }
691     /* copy over the B part */
692     Bloc = (Mat_SeqBAIJ*)baij->B->data;
693     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
694     for (i=0; i<mbs; i++) {
695 
696       rvals[0] = bs*(baij->rstartbs + i);
697       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
698       for (j=ai[i]; j<ai[i+1]; j++) {
699         col = baij->garray[aj[j]]*bs;
700         for (k=0; k<bs; k++) {
701           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
702           col++;
703           a += bs;
704         }
705       }
706     }
707     ierr = PetscFree(rvals);CHKERRQ(ierr);
708     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     /*
711        Everyone has to call to draw the matrix since the graphics waits are
712        synchronized across all processors that share the PetscDraw object
713     */
714     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     if (!rank) {
716       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
717       /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
718       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
719       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
720     }
721     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
722     ierr = MatDestroy(&A);CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatView_MPISBAIJ"
729 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
730 {
731   PetscErrorCode ierr;
732   PetscBool      iascii,isdraw,issocket,isbinary;
733 
734   PetscFunctionBegin;
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
739   if (iascii || isdraw || issocket || isbinary) {
740     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined(PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
897 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
898 {
899   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
900   PetscErrorCode ierr;
901   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
902   PetscScalar    *x,*from,zero=0.0;
903 
904   PetscFunctionBegin;
905   /*
906   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
907   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A));
908   */
909   /* diagonal part */
910   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
911   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
912 
913   /* subdiagonal part */
914   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
915 
916   /* copy x into the vec slvec0 */
917   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
919   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
920   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
921 
922   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
924   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925 
926   /* supperdiagonal part */
927   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   /* do diagonal part */
941   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
942   /* do supperdiagonal part */
943   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
945 
946   /* do subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
948   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 /*
954   This only works correctly for square matrices where the subblock A->A is the
955    diagonal block
956 */
957 #undef __FUNCT__
958 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
959 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
960 {
961   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
966   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatScale_MPISBAIJ"
972 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
973 {
974   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
975   PetscErrorCode ierr;
976 
977   PetscFunctionBegin;
978   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
979   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatGetRow_MPISBAIJ"
985 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
986 {
987   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
988   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
989   PetscErrorCode ierr;
990   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
991   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
992   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
993 
994   PetscFunctionBegin;
995   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
996   mat->getrowactive = PETSC_TRUE;
997 
998   if (!mat->rowvalues && (idx || v)) {
999     /*
1000         allocate enough space to hold information from the longest row.
1001     */
1002     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1003     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1004     PetscInt     max = 1,mbs = mat->mbs,tmp;
1005     for (i=0; i<mbs; i++) {
1006       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1007       if (max < tmp) max = tmp;
1008     }
1009     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1010   }
1011 
1012   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1013   lrow = row - brstart;  /* local row index */
1014 
1015   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1016   if (!v)   {pvA = 0; pvB = 0;}
1017   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1018   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1019   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1020   nztot = nzA + nzB;
1021 
1022   cmap = mat->garray;
1023   if (v  || idx) {
1024     if (nztot) {
1025       /* Sort by increasing column numbers, assuming A and B already sorted */
1026       PetscInt imark = -1;
1027       if (v) {
1028         *v = v_p = mat->rowvalues;
1029         for (i=0; i<nzB; i++) {
1030           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1031           else break;
1032         }
1033         imark = i;
1034         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1035         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1036       }
1037       if (idx) {
1038         *idx = idx_p = mat->rowindices;
1039         if (imark > -1) {
1040           for (i=0; i<imark; i++) {
1041             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1042           }
1043         } else {
1044           for (i=0; i<nzB; i++) {
1045             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1046             else break;
1047           }
1048           imark = i;
1049         }
1050         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1051         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1052       }
1053     } else {
1054       if (idx) *idx = 0;
1055       if (v)   *v   = 0;
1056     }
1057   }
1058   *nz  = nztot;
1059   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1060   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1066 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1067 {
1068   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1069 
1070   PetscFunctionBegin;
1071   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1072   baij->getrowactive = PETSC_FALSE;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1078 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1079 {
1080   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1081   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1082 
1083   PetscFunctionBegin;
1084   aA->getrow_utriangular = PETSC_TRUE;
1085   PetscFunctionReturn(0);
1086 }
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1089 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1090 {
1091   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1092   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1093 
1094   PetscFunctionBegin;
1095   aA->getrow_utriangular = PETSC_FALSE;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1101 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1102 {
1103   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1108   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1114 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1115 {
1116   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118 
1119   PetscFunctionBegin;
1120   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1121   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1127 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1128 {
1129   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1134   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1140 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1141 {
1142   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1143   Mat            A  = a->A,B = a->B;
1144   PetscErrorCode ierr;
1145   PetscReal      isend[5],irecv[5];
1146 
1147   PetscFunctionBegin;
1148   info->block_size = (PetscReal)matin->rmap->bs;
1149 
1150   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1151 
1152   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153   isend[3] = info->memory;  isend[4] = info->mallocs;
1154 
1155   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1156 
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1167 
1168     info->nz_used      = irecv[0];
1169     info->nz_allocated = irecv[1];
1170     info->nz_unneeded  = irecv[2];
1171     info->memory       = irecv[3];
1172     info->mallocs      = irecv[4];
1173   } else if (flag == MAT_GLOBAL_SUM) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1175 
1176     info->nz_used      = irecv[0];
1177     info->nz_allocated = irecv[1];
1178     info->nz_unneeded  = irecv[2];
1179     info->memory       = irecv[3];
1180     info->mallocs      = irecv[4];
1181   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1191 {
1192   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208 
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_NEW_DIAGONALS:
1213     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1214     break;
1215   case MAT_IGNORE_OFF_PROC_ENTRIES:
1216     a->donotstash = flg;
1217     break;
1218   case MAT_USE_HASH_TABLE:
1219     a->ht_flag = flg;
1220     break;
1221   case MAT_HERMITIAN:
1222     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1223     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1224 
1225     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1226     break;
1227   case MAT_SPD:
1228     A->spd_set = PETSC_TRUE;
1229     A->spd     = flg;
1230     if (flg) {
1231       A->symmetric                  = PETSC_TRUE;
1232       A->structurally_symmetric     = PETSC_TRUE;
1233       A->symmetric_set              = PETSC_TRUE;
1234       A->structurally_symmetric_set = PETSC_TRUE;
1235     }
1236     break;
1237   case MAT_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1239     break;
1240   case MAT_STRUCTURALLY_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1242     break;
1243   case MAT_SYMMETRY_ETERNAL:
1244     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1245     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1246     break;
1247   case MAT_IGNORE_LOWER_TRIANGULAR:
1248     aA->ignore_ltriangular = flg;
1249     break;
1250   case MAT_ERROR_LOWER_TRIANGULAR:
1251     aA->ignore_ltriangular = flg;
1252     break;
1253   case MAT_GETROW_UPPERTRIANGULAR:
1254     aA->getrow_utriangular = flg;
1255     break;
1256   default:
1257     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1264 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1265 {
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269   if (MAT_INITIAL_MATRIX || *B != A) {
1270     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1277 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1278 {
1279   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1280   Mat            a     = baij->A, b=baij->B;
1281   PetscErrorCode ierr;
1282   PetscInt       nv,m,n;
1283   PetscBool      flg;
1284 
1285   PetscFunctionBegin;
1286   if (ll != rr) {
1287     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1288     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1289   }
1290   if (!ll) PetscFunctionReturn(0);
1291 
1292   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1293   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1294 
1295   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1296   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1297 
1298   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1299 
1300   /* left diagonalscale the off-diagonal part */
1301   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1302 
1303   /* scale the diagonal part */
1304   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1305 
1306   /* right diagonalscale the off-diagonal part */
1307   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1308   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1314 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1315 {
1316   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatEqual_MPISBAIJ"
1328 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1329 {
1330   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1331   Mat            a,b,c,d;
1332   PetscBool      flg;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   a = matA->A; b = matA->B;
1337   c = matB->A; d = matB->B;
1338 
1339   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1340   if (flg) {
1341     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1342   }
1343   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatCopy_MPISBAIJ"
1349 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1350 {
1351   PetscErrorCode ierr;
1352   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1353   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1354 
1355   PetscFunctionBegin;
1356   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1357   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1358     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1359     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1360     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1361   } else {
1362     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1363     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1370 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1371 {
1372   PetscErrorCode ierr;
1373 
1374   PetscFunctionBegin;
1375   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1381 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1382 {
1383   PetscErrorCode ierr;
1384   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1385   PetscBLASInt   bnz,one=1;
1386   Mat_SeqSBAIJ   *xa,*ya;
1387   Mat_SeqBAIJ    *xb,*yb;
1388 
1389   PetscFunctionBegin;
1390   if (str == SAME_NONZERO_PATTERN) {
1391     PetscScalar alpha = a;
1392     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1393     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1394     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1395     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1396     xb   = (Mat_SeqBAIJ*)xx->B->data;
1397     yb   = (Mat_SeqBAIJ*)yy->B->data;
1398     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1399     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1400   } else {
1401     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1402     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1403     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1410 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1411 {
1412   PetscErrorCode ierr;
1413   PetscInt       i;
1414   PetscBool      flg;
1415 
1416   PetscFunctionBegin;
1417   for (i=0; i<n; i++) {
1418     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1419     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1420   }
1421   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 
1426 /* -------------------------------------------------------------------*/
1427 static struct _MatOps MatOps_Values = {
1428         MatSetValues_MPISBAIJ,
1429         MatGetRow_MPISBAIJ,
1430         MatRestoreRow_MPISBAIJ,
1431         MatMult_MPISBAIJ,
1432 /*  4*/ MatMultAdd_MPISBAIJ,
1433         MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1434         MatMultAdd_MPISBAIJ,
1435         0,
1436         0,
1437         0,
1438 /* 10*/ 0,
1439         0,
1440         0,
1441         MatSOR_MPISBAIJ,
1442         MatTranspose_MPISBAIJ,
1443 /* 15*/ MatGetInfo_MPISBAIJ,
1444         MatEqual_MPISBAIJ,
1445         MatGetDiagonal_MPISBAIJ,
1446         MatDiagonalScale_MPISBAIJ,
1447         MatNorm_MPISBAIJ,
1448 /* 20*/ MatAssemblyBegin_MPISBAIJ,
1449         MatAssemblyEnd_MPISBAIJ,
1450         MatSetOption_MPISBAIJ,
1451         MatZeroEntries_MPISBAIJ,
1452 /* 24*/ 0,
1453         0,
1454         0,
1455         0,
1456         0,
1457 /* 29*/ MatSetUp_MPISBAIJ,
1458         0,
1459         0,
1460         0,
1461         0,
1462 /* 34*/ MatDuplicate_MPISBAIJ,
1463         0,
1464         0,
1465         0,
1466         0,
1467 /* 39*/ MatAXPY_MPISBAIJ,
1468         MatGetSubMatrices_MPISBAIJ,
1469         MatIncreaseOverlap_MPISBAIJ,
1470         MatGetValues_MPISBAIJ,
1471         MatCopy_MPISBAIJ,
1472 /* 44*/ 0,
1473         MatScale_MPISBAIJ,
1474         0,
1475         0,
1476         0,
1477 /* 49*/ 0,
1478         0,
1479         0,
1480         0,
1481         0,
1482 /* 54*/ 0,
1483         0,
1484         MatSetUnfactored_MPISBAIJ,
1485         0,
1486         MatSetValuesBlocked_MPISBAIJ,
1487 /* 59*/ 0,
1488         0,
1489         0,
1490         0,
1491         0,
1492 /* 64*/ 0,
1493         0,
1494         0,
1495         0,
1496         0,
1497 /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1498         0,
1499         0,
1500         0,
1501         0,
1502 /* 74*/ 0,
1503         0,
1504         0,
1505         0,
1506         0,
1507 /* 79*/ 0,
1508         0,
1509         0,
1510         0,
1511         MatLoad_MPISBAIJ,
1512 /* 84*/ 0,
1513         0,
1514         0,
1515         0,
1516         0,
1517 /* 89*/ 0,
1518         0,
1519         0,
1520         0,
1521         0,
1522 /* 94*/ 0,
1523         0,
1524         0,
1525         0,
1526         0,
1527 /* 99*/ 0,
1528         0,
1529         0,
1530         0,
1531         0,
1532 /*104*/ 0,
1533         MatRealPart_MPISBAIJ,
1534         MatImaginaryPart_MPISBAIJ,
1535         MatGetRowUpperTriangular_MPISBAIJ,
1536         MatRestoreRowUpperTriangular_MPISBAIJ,
1537 /*109*/ 0,
1538         0,
1539         0,
1540         0,
1541         0,
1542 /*114*/ 0,
1543         0,
1544         0,
1545         0,
1546         0,
1547 /*119*/ 0,
1548         0,
1549         0,
1550         0
1551 };
1552 
1553 
1554 EXTERN_C_BEGIN
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1557 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1558 {
1559   PetscFunctionBegin;
1560   *a = ((Mat_MPISBAIJ*)A->data)->A;
1561   PetscFunctionReturn(0);
1562 }
1563 EXTERN_C_END
1564 
1565 EXTERN_C_BEGIN
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1568 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1569 {
1570   Mat_MPISBAIJ   *b;
1571   PetscErrorCode ierr;
1572   PetscInt       i,mbs,Mbs;
1573 
1574   PetscFunctionBegin;
1575   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1576   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1577   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1578   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1579 
1580   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1581   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1582   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1583   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1584   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1585 
1586   if (d_nnz) {
1587     for (i=0; i<B->rmap->n/bs; i++) {
1588       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1589     }
1590   }
1591   if (o_nnz) {
1592     for (i=0; i<B->rmap->n/bs; i++) {
1593       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1594     }
1595   }
1596 
1597   b   = (Mat_MPISBAIJ*)B->data;
1598   mbs = B->rmap->n/bs;
1599   Mbs = B->rmap->N/bs;
1600   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1601 
1602   B->rmap->bs = bs;
1603   b->bs2      = bs*bs;
1604   b->mbs      = mbs;
1605   b->nbs      = mbs;
1606   b->Mbs      = Mbs;
1607   b->Nbs      = Mbs;
1608 
1609   for (i=0; i<=b->size; i++) {
1610     b->rangebs[i] = B->rmap->range[i]/bs;
1611   }
1612   b->rstartbs = B->rmap->rstart/bs;
1613   b->rendbs   = B->rmap->rend/bs;
1614 
1615   b->cstartbs = B->cmap->rstart/bs;
1616   b->cendbs   = B->cmap->rend/bs;
1617 
1618   if (!B->preallocated) {
1619     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1620     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1621     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1622     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1623     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1624     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1625     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1626     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1627     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1628   }
1629 
1630   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1631   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1632 
1633   B->preallocated = PETSC_TRUE;
1634   PetscFunctionReturn(0);
1635 }
1636 EXTERN_C_END
1637 
1638 EXTERN_C_BEGIN
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1641 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1642 {
1643   PetscInt       m,rstart,cstart,cend;
1644   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1645   const PetscInt *JJ    =0;
1646   PetscScalar    *values=0;
1647   PetscErrorCode ierr;
1648 
1649   PetscFunctionBegin;
1650   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1651   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1652   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1653   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1654   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1655   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1656   m      = B->rmap->n/bs;
1657   rstart = B->rmap->rstart/bs;
1658   cstart = B->cmap->rstart/bs;
1659   cend   = B->cmap->rend/bs;
1660 
1661   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1662   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1663   for (i=0; i<m; i++) {
1664     nz = ii[i+1] - ii[i];
1665     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1666     nz_max = PetscMax(nz_max,nz);
1667     JJ     = jj + ii[i];
1668     for (j=0; j<nz; j++) {
1669       if (*JJ >= cstart) break;
1670       JJ++;
1671     }
1672     d = 0;
1673     for (; j<nz; j++) {
1674       if (*JJ++ >= cend) break;
1675       d++;
1676     }
1677     d_nnz[i] = d;
1678     o_nnz[i] = nz - d;
1679   }
1680   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1681   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1682 
1683   values = (PetscScalar*)V;
1684   if (!values) {
1685     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1686     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1687   }
1688   for (i=0; i<m; i++) {
1689     PetscInt          row    = i + rstart;
1690     PetscInt          ncols  = ii[i+1] - ii[i];
1691     const PetscInt    *icols = jj + ii[i];
1692     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1693     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1694   }
1695 
1696   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1697   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 EXTERN_C_END
1703 
1704 EXTERN_C_BEGIN
1705 #if defined(PETSC_HAVE_MUMPS)
1706 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1707 #endif
1708 #if defined(PETSC_HAVE_PASTIX)
1709 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1710 #endif
1711 EXTERN_C_END
1712 
1713 /*MC
1714    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1715    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1716    the matrix is stored.
1717 
1718   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1719   can call MatSetOption(Mat, MAT_HERMITIAN);
1720 
1721    Options Database Keys:
1722 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1723 
1724   Level: beginner
1725 
1726 .seealso: MatCreateMPISBAIJ
1727 M*/
1728 
1729 EXTERN_C_BEGIN
1730 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1731 EXTERN_C_END
1732 
1733 EXTERN_C_BEGIN
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatCreate_MPISBAIJ"
1736 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1737 {
1738   Mat_MPISBAIJ   *b;
1739   PetscErrorCode ierr;
1740   PetscBool      flg;
1741 
1742   PetscFunctionBegin;
1743   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1744   B->data = (void*)b;
1745   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1746 
1747   B->ops->destroy = MatDestroy_MPISBAIJ;
1748   B->ops->view    = MatView_MPISBAIJ;
1749   B->assembled    = PETSC_FALSE;
1750   B->insertmode   = NOT_SET_VALUES;
1751 
1752   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1753   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1754 
1755   /* build local table of row and column ownerships */
1756   ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1757 
1758   /* build cache for off array entries formed */
1759   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1760 
1761   b->donotstash  = PETSC_FALSE;
1762   b->colmap      = NULL;
1763   b->garray      = NULL;
1764   b->roworiented = PETSC_TRUE;
1765 
1766   /* stuff used in block assembly */
1767   b->barray = 0;
1768 
1769   /* stuff used for matrix vector multiply */
1770   b->lvec    = 0;
1771   b->Mvctx   = 0;
1772   b->slvec0  = 0;
1773   b->slvec0b = 0;
1774   b->slvec1  = 0;
1775   b->slvec1a = 0;
1776   b->slvec1b = 0;
1777   b->sMvctx  = 0;
1778 
1779   /* stuff for MatGetRow() */
1780   b->rowindices   = 0;
1781   b->rowvalues    = 0;
1782   b->getrowactive = PETSC_FALSE;
1783 
1784   /* hash table stuff */
1785   b->ht           = 0;
1786   b->hd           = 0;
1787   b->ht_size      = 0;
1788   b->ht_flag      = PETSC_FALSE;
1789   b->ht_fact      = 0;
1790   b->ht_total_ct  = 0;
1791   b->ht_insert_ct = 0;
1792 
1793   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1794   b->ijonly = PETSC_FALSE;
1795 
1796   b->in_loc = 0;
1797   b->v_loc  = 0;
1798   b->n_loc  = 0;
1799   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1800   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1801   if (flg) {
1802     PetscReal fact = 1.39;
1803     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1804     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1805     if (fact <= 1.0) fact = 1.39;
1806     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1807     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1808   }
1809   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1810 
1811 #if defined(PETSC_HAVE_PASTIX)
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1813                                            "MatGetFactor_mpisbaij_pastix",
1814                                            MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1815 #endif
1816 #if defined(PETSC_HAVE_MUMPS)
1817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1818                                            "MatGetFactor_sbaij_mumps",
1819                                            MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1820 #endif
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1822                                            "MatStoreValues_MPISBAIJ",
1823                                            MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1825                                            "MatRetrieveValues_MPISBAIJ",
1826                                            MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1828                                            "MatGetDiagonalBlock_MPISBAIJ",
1829                                            MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1831                                            "MatMPISBAIJSetPreallocation_MPISBAIJ",
1832                                            MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1834                                            "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1835                                            MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1837                                            "MatConvert_MPISBAIJ_MPISBSTRM",
1838                                            MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1839 
1840   B->symmetric                  = PETSC_TRUE;
1841   B->structurally_symmetric     = PETSC_TRUE;
1842   B->symmetric_set              = PETSC_TRUE;
1843   B->structurally_symmetric_set = PETSC_TRUE;
1844 
1845   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1846   PetscFunctionReturn(0);
1847 }
1848 EXTERN_C_END
1849 
1850 /*MC
1851    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1852 
1853    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1854    and MATMPISBAIJ otherwise.
1855 
1856    Options Database Keys:
1857 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1858 
1859   Level: beginner
1860 
1861 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1862 M*/
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1866 /*@C
1867    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1868    the user should preallocate the matrix storage by setting the parameters
1869    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1870    performance can be increased by more than a factor of 50.
1871 
1872    Collective on Mat
1873 
1874    Input Parameters:
1875 +  A - the matrix
1876 .  bs   - size of blockk
1877 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1878            submatrix  (same for all local rows)
1879 .  d_nnz - array containing the number of block nonzeros in the various block rows
1880            in the upper triangular and diagonal part of the in diagonal portion of the local
1881            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1882            for the diagonal entry and set a value even if it is zero.
1883 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1884            submatrix (same for all local rows).
1885 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1886            off-diagonal portion of the local submatrix that is right of the diagonal
1887            (possibly different for each block row) or NULL.
1888 
1889 
1890    Options Database Keys:
1891 .   -mat_no_unroll - uses code that does not unroll the loops in the
1892                      block calculations (much slower)
1893 .   -mat_block_size - size of the blocks to use
1894 
1895    Notes:
1896 
1897    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1898    than it must be used on all processors that share the object for that argument.
1899 
1900    If the *_nnz parameter is given then the *_nz parameter is ignored
1901 
1902    Storage Information:
1903    For a square global matrix we define each processor's diagonal portion
1904    to be its local rows and the corresponding columns (a square submatrix);
1905    each processor's off-diagonal portion encompasses the remainder of the
1906    local matrix (a rectangular submatrix).
1907 
1908    The user can specify preallocated storage for the diagonal part of
1909    the local submatrix with either d_nz or d_nnz (not both).  Set
1910    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1911    memory allocation.  Likewise, specify preallocated storage for the
1912    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1913 
1914    You can call MatGetInfo() to get information on how effective the preallocation was;
1915    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1916    You can also run with the option -info and look for messages with the string
1917    malloc in them to see if additional memory allocation was needed.
1918 
1919    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1920    the figure below we depict these three local rows and all columns (0-11).
1921 
1922 .vb
1923            0 1 2 3 4 5 6 7 8 9 10 11
1924           -------------------
1925    row 3  |  . . . d d d o o o o o o
1926    row 4  |  . . . d d d o o o o o o
1927    row 5  |  . . . d d d o o o o o o
1928           -------------------
1929 .ve
1930 
1931    Thus, any entries in the d locations are stored in the d (diagonal)
1932    submatrix, and any entries in the o locations are stored in the
1933    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1934    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1935 
1936    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1937    plus the diagonal part of the d matrix,
1938    and o_nz should indicate the number of block nonzeros per row in the o matrix
1939 
1940    In general, for PDE problems in which most nonzeros are near the diagonal,
1941    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1942    or you will get TERRIBLE performance; see the users' manual chapter on
1943    matrices.
1944 
1945    Level: intermediate
1946 
1947 .keywords: matrix, block, aij, compressed row, sparse, parallel
1948 
1949 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1950 @*/
1951 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1952 {
1953   PetscErrorCode ierr;
1954 
1955   PetscFunctionBegin;
1956   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1957   PetscValidType(B,1);
1958   PetscValidLogicalCollectiveInt(B,bs,2);
1959   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatCreateSBAIJ"
1965 /*@C
1966    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1967    (block compressed row).  For good matrix assembly performance
1968    the user should preallocate the matrix storage by setting the parameters
1969    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1970    performance can be increased by more than a factor of 50.
1971 
1972    Collective on MPI_Comm
1973 
1974    Input Parameters:
1975 +  comm - MPI communicator
1976 .  bs   - size of blockk
1977 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1978            This value should be the same as the local size used in creating the
1979            y vector for the matrix-vector product y = Ax.
1980 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1981            This value should be the same as the local size used in creating the
1982            x vector for the matrix-vector product y = Ax.
1983 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1984 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1985 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1986            submatrix  (same for all local rows)
1987 .  d_nnz - array containing the number of block nonzeros in the various block rows
1988            in the upper triangular portion of the in diagonal portion of the local
1989            (possibly different for each block block row) or NULL.
1990            If you plan to factor the matrix you must leave room for the diagonal entry and
1991            set its value even if it is zero.
1992 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1993            submatrix (same for all local rows).
1994 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1995            off-diagonal portion of the local submatrix (possibly different for
1996            each block row) or NULL.
1997 
1998    Output Parameter:
1999 .  A - the matrix
2000 
2001    Options Database Keys:
2002 .   -mat_no_unroll - uses code that does not unroll the loops in the
2003                      block calculations (much slower)
2004 .   -mat_block_size - size of the blocks to use
2005 .   -mat_mpi - use the parallel matrix data structures even on one processor
2006                (defaults to using SeqBAIJ format on one processor)
2007 
2008    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2009    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2010    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2011 
2012    Notes:
2013    The number of rows and columns must be divisible by blocksize.
2014    This matrix type does not support complex Hermitian operation.
2015 
2016    The user MUST specify either the local or global matrix dimensions
2017    (possibly both).
2018 
2019    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2020    than it must be used on all processors that share the object for that argument.
2021 
2022    If the *_nnz parameter is given then the *_nz parameter is ignored
2023 
2024    Storage Information:
2025    For a square global matrix we define each processor's diagonal portion
2026    to be its local rows and the corresponding columns (a square submatrix);
2027    each processor's off-diagonal portion encompasses the remainder of the
2028    local matrix (a rectangular submatrix).
2029 
2030    The user can specify preallocated storage for the diagonal part of
2031    the local submatrix with either d_nz or d_nnz (not both).  Set
2032    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2033    memory allocation.  Likewise, specify preallocated storage for the
2034    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2035 
2036    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2037    the figure below we depict these three local rows and all columns (0-11).
2038 
2039 .vb
2040            0 1 2 3 4 5 6 7 8 9 10 11
2041           -------------------
2042    row 3  |  . . . d d d o o o o o o
2043    row 4  |  . . . d d d o o o o o o
2044    row 5  |  . . . d d d o o o o o o
2045           -------------------
2046 .ve
2047 
2048    Thus, any entries in the d locations are stored in the d (diagonal)
2049    submatrix, and any entries in the o locations are stored in the
2050    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2051    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2052 
2053    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2054    plus the diagonal part of the d matrix,
2055    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2056    In general, for PDE problems in which most nonzeros are near the diagonal,
2057    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2058    or you will get TERRIBLE performance; see the users' manual chapter on
2059    matrices.
2060 
2061    Level: intermediate
2062 
2063 .keywords: matrix, block, aij, compressed row, sparse, parallel
2064 
2065 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2066 @*/
2067 
2068 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2069 {
2070   PetscErrorCode ierr;
2071   PetscMPIInt    size;
2072 
2073   PetscFunctionBegin;
2074   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2075   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2077   if (size > 1) {
2078     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2079     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2080   } else {
2081     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2082     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2090 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2091 {
2092   Mat            mat;
2093   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2094   PetscErrorCode ierr;
2095   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2096   PetscScalar    *array;
2097 
2098   PetscFunctionBegin;
2099   *newmat = 0;
2100 
2101   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2102   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2103   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2104   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2105   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2106   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2107 
2108   mat->factortype   = matin->factortype;
2109   mat->preallocated = PETSC_TRUE;
2110   mat->assembled    = PETSC_TRUE;
2111   mat->insertmode   = NOT_SET_VALUES;
2112 
2113   a      = (Mat_MPISBAIJ*)mat->data;
2114   a->bs2 = oldmat->bs2;
2115   a->mbs = oldmat->mbs;
2116   a->nbs = oldmat->nbs;
2117   a->Mbs = oldmat->Mbs;
2118   a->Nbs = oldmat->Nbs;
2119 
2120 
2121   a->size         = oldmat->size;
2122   a->rank         = oldmat->rank;
2123   a->donotstash   = oldmat->donotstash;
2124   a->roworiented  = oldmat->roworiented;
2125   a->rowindices   = 0;
2126   a->rowvalues    = 0;
2127   a->getrowactive = PETSC_FALSE;
2128   a->barray       = 0;
2129   a->rstartbs     = oldmat->rstartbs;
2130   a->rendbs       = oldmat->rendbs;
2131   a->cstartbs     = oldmat->cstartbs;
2132   a->cendbs       = oldmat->cendbs;
2133 
2134   /* hash table stuff */
2135   a->ht           = 0;
2136   a->hd           = 0;
2137   a->ht_size      = 0;
2138   a->ht_flag      = oldmat->ht_flag;
2139   a->ht_fact      = oldmat->ht_fact;
2140   a->ht_total_ct  = 0;
2141   a->ht_insert_ct = 0;
2142 
2143   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2144   if (oldmat->colmap) {
2145 #if defined(PETSC_USE_CTABLE)
2146     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2147 #else
2148     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2149     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2150     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2151 #endif
2152   } else a->colmap = 0;
2153 
2154   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2155     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2156     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2157     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2158   } else a->garray = 0;
2159 
2160   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2161   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2162   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2163   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2164   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2165 
2166   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2167   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2168   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2169   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2170 
2171   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2172   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2173   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2174   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2175   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2176   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2177   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2178   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2179   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2180   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2181   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2182   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2183   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2184 
2185   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2186   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2187   a->sMvctx = oldmat->sMvctx;
2188   ierr      = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2189 
2190   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2191   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2192   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2193   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2194   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2195   *newmat = mat;
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatLoad_MPISBAIJ"
2201 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2202 {
2203   PetscErrorCode ierr;
2204   PetscInt       i,nz,j,rstart,rend;
2205   PetscScalar    *vals,*buf;
2206   MPI_Comm       comm;
2207   MPI_Status     status;
2208   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2209   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2210   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2211   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2212   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2213   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2214   int            fd;
2215 
2216   PetscFunctionBegin;
2217   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2218   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2219   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2220   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2221 
2222   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2223   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2224   if (!rank) {
2225     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2226     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2227     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2228     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2229   }
2230 
2231   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2232 
2233   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2234   M    = header[1];
2235   N    = header[2];
2236 
2237   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2238   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2239   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2240 
2241   /* If global sizes are set, check if they are consistent with that given in the file */
2242   if (sizesset) {
2243     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2244   }
2245   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2246   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2247 
2248   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2249 
2250   /*
2251      This code adds extra rows to make sure the number of rows is
2252      divisible by the blocksize
2253   */
2254   Mbs        = M/bs;
2255   extra_rows = bs - M + bs*(Mbs);
2256   if (extra_rows == bs) extra_rows = 0;
2257   else                  Mbs++;
2258   if (extra_rows &&!rank) {
2259     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2260   }
2261 
2262   /* determine ownership of all rows */
2263   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2264     mbs = Mbs/size + ((Mbs % size) > rank);
2265     m   = mbs*bs;
2266   } else { /* User Set */
2267     m   = newmat->rmap->n;
2268     mbs = m/bs;
2269   }
2270   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2271   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2272   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2273   rowners[0] = 0;
2274   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2275   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2276   rstart = rowners[rank];
2277   rend   = rowners[rank+1];
2278 
2279   /* distribute row lengths to all processors */
2280   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2281   if (!rank) {
2282     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2283     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2284     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2285     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2286     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2287     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2288     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2289   } else {
2290     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2291   }
2292 
2293   if (!rank) {   /* procs[0] */
2294     /* calculate the number of nonzeros on each processor */
2295     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2296     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2297     for (i=0; i<size; i++) {
2298       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2299         procsnz[i] += rowlengths[j];
2300       }
2301     }
2302     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2303 
2304     /* determine max buffer needed and allocate it */
2305     maxnz = 0;
2306     for (i=0; i<size; i++) {
2307       maxnz = PetscMax(maxnz,procsnz[i]);
2308     }
2309     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2310 
2311     /* read in my part of the matrix column indices  */
2312     nz     = procsnz[0];
2313     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2314     mycols = ibuf;
2315     if (size == 1) nz -= extra_rows;
2316     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2317     if (size == 1) {
2318       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2319     }
2320 
2321     /* read in every ones (except the last) and ship off */
2322     for (i=1; i<size-1; i++) {
2323       nz   = procsnz[i];
2324       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2325       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2326     }
2327     /* read in the stuff for the last proc */
2328     if (size != 1) {
2329       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2330       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2331       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2332       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2333     }
2334     ierr = PetscFree(cols);CHKERRQ(ierr);
2335   } else {  /* procs[i], i>0 */
2336     /* determine buffer space needed for message */
2337     nz = 0;
2338     for (i=0; i<m; i++) nz += locrowlens[i];
2339     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2340     mycols = ibuf;
2341     /* receive message of column indices*/
2342     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2343     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2344     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2345   }
2346 
2347   /* loop over local rows, determining number of off diagonal entries */
2348   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2349   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2350   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2351   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2352   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2353   rowcount = 0;
2354   nzcount  = 0;
2355   for (i=0; i<mbs; i++) {
2356     dcount  = 0;
2357     odcount = 0;
2358     for (j=0; j<bs; j++) {
2359       kmax = locrowlens[rowcount];
2360       for (k=0; k<kmax; k++) {
2361         tmp = mycols[nzcount++]/bs; /* block col. index */
2362         if (!mask[tmp]) {
2363           mask[tmp] = 1;
2364           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2365           else masked1[dcount++] = tmp; /* entry in diag portion */
2366         }
2367       }
2368       rowcount++;
2369     }
2370 
2371     dlens[i]  = dcount;  /* d_nzz[i] */
2372     odlens[i] = odcount; /* o_nzz[i] */
2373 
2374     /* zero out the mask elements we set */
2375     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2376     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2377   }
2378   if (!sizesset) {
2379     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2380   }
2381   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2382   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2383 
2384   if (!rank) {
2385     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2386     /* read in my part of the matrix numerical values  */
2387     nz     = procsnz[0];
2388     vals   = buf;
2389     mycols = ibuf;
2390     if (size == 1) nz -= extra_rows;
2391     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2392     if (size == 1) {
2393       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2394     }
2395 
2396     /* insert into matrix */
2397     jj = rstart*bs;
2398     for (i=0; i<m; i++) {
2399       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2400       mycols += locrowlens[i];
2401       vals   += locrowlens[i];
2402       jj++;
2403     }
2404 
2405     /* read in other processors (except the last one) and ship out */
2406     for (i=1; i<size-1; i++) {
2407       nz   = procsnz[i];
2408       vals = buf;
2409       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2410       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2411     }
2412     /* the last proc */
2413     if (size != 1) {
2414       nz   = procsnz[i] - extra_rows;
2415       vals = buf;
2416       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2417       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2418       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2419     }
2420     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2421 
2422   } else {
2423     /* receive numeric values */
2424     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2425 
2426     /* receive message of values*/
2427     vals   = buf;
2428     mycols = ibuf;
2429     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2430     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2431     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2432 
2433     /* insert into matrix */
2434     jj = rstart*bs;
2435     for (i=0; i<m; i++) {
2436       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2437       mycols += locrowlens[i];
2438       vals   += locrowlens[i];
2439       jj++;
2440     }
2441   }
2442 
2443   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2444   ierr = PetscFree(buf);CHKERRQ(ierr);
2445   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2446   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2447   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2448   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2449   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2450   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2456 /*XXXXX@
2457    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2458 
2459    Input Parameters:
2460 .  mat  - the matrix
2461 .  fact - factor
2462 
2463    Not Collective on Mat, each process can have a different hash factor
2464 
2465    Level: advanced
2466 
2467   Notes:
2468    This can also be set by the command line option: -mat_use_hash_table fact
2469 
2470 .keywords: matrix, hashtable, factor, HT
2471 
2472 .seealso: MatSetOption()
2473 @XXXXX*/
2474 
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2478 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2479 {
2480   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2481   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2482   PetscReal      atmp;
2483   PetscReal      *work,*svalues,*rvalues;
2484   PetscErrorCode ierr;
2485   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2486   PetscMPIInt    rank,size;
2487   PetscInt       *rowners_bs,dest,count,source;
2488   PetscScalar    *va;
2489   MatScalar      *ba;
2490   MPI_Status     stat;
2491 
2492   PetscFunctionBegin;
2493   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2494   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2495   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2496 
2497   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2498   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2499 
2500   bs  = A->rmap->bs;
2501   mbs = a->mbs;
2502   Mbs = a->Mbs;
2503   ba  = b->a;
2504   bi  = b->i;
2505   bj  = b->j;
2506 
2507   /* find ownerships */
2508   rowners_bs = A->rmap->range;
2509 
2510   /* each proc creates an array to be distributed */
2511   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2512   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2513 
2514   /* row_max for B */
2515   if (rank != size-1) {
2516     for (i=0; i<mbs; i++) {
2517       ncols = bi[1] - bi[0]; bi++;
2518       brow  = bs*i;
2519       for (j=0; j<ncols; j++) {
2520         bcol = bs*(*bj);
2521         for (kcol=0; kcol<bs; kcol++) {
2522           col  = bcol + kcol;                /* local col index */
2523           col += rowners_bs[rank+1];      /* global col index */
2524           for (krow=0; krow<bs; krow++) {
2525             atmp = PetscAbsScalar(*ba); ba++;
2526             row  = brow + krow;   /* local row index */
2527             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2528             if (work[col] < atmp) work[col] = atmp;
2529           }
2530         }
2531         bj++;
2532       }
2533     }
2534 
2535     /* send values to its owners */
2536     for (dest=rank+1; dest<size; dest++) {
2537       svalues = work + rowners_bs[dest];
2538       count   = rowners_bs[dest+1]-rowners_bs[dest];
2539       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2540     }
2541   }
2542 
2543   /* receive values */
2544   if (rank) {
2545     rvalues = work;
2546     count   = rowners_bs[rank+1]-rowners_bs[rank];
2547     for (source=0; source<rank; source++) {
2548       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2549       /* process values */
2550       for (i=0; i<count; i++) {
2551         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2552       }
2553     }
2554   }
2555 
2556   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2557   ierr = PetscFree(work);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatSOR_MPISBAIJ"
2563 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2564 {
2565   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2566   PetscErrorCode    ierr;
2567   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2568   PetscScalar       *x,*ptr,*from;
2569   Vec               bb1;
2570   const PetscScalar *b;
2571 
2572   PetscFunctionBegin;
2573   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2574   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2575 
2576   if (flag == SOR_APPLY_UPPER) {
2577     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2578     PetscFunctionReturn(0);
2579   }
2580 
2581   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2582     if (flag & SOR_ZERO_INITIAL_GUESS) {
2583       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2584       its--;
2585     }
2586 
2587     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2588     while (its--) {
2589 
2590       /* lower triangular part: slvec0b = - B^T*xx */
2591       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2592 
2593       /* copy xx into slvec0a */
2594       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2595       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2596       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2597       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2598 
2599       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2600 
2601       /* copy bb into slvec1a */
2602       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2603       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2604       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2605       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2606 
2607       /* set slvec1b = 0 */
2608       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2609 
2610       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2611       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2612       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2613       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2614 
2615       /* upper triangular part: bb1 = bb1 - B*x */
2616       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2617 
2618       /* local diagonal sweep */
2619       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2620     }
2621     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2622   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2623     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2624   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2625     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2626   } else if (flag & SOR_EISENSTAT) {
2627     Vec               xx1;
2628     PetscBool         hasop;
2629     const PetscScalar *diag;
2630     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2631     PetscInt          i,n;
2632 
2633     if (!mat->xx1) {
2634       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2635       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2636     }
2637     xx1 = mat->xx1;
2638     bb1 = mat->bb1;
2639 
2640     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2641 
2642     if (!mat->diag) {
2643       /* this is wrong for same matrix with new nonzero values */
2644       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2645       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2646     }
2647     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2648 
2649     if (hasop) {
2650       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2651       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2652     } else {
2653       /*
2654           These two lines are replaced by code that may be a bit faster for a good compiler
2655       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2656       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2657       */
2658       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2659       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2660       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2661       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2662       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2663       if (omega == 1.0) {
2664         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2665         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2666       } else {
2667         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2668         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2669       }
2670       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2671       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2672       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2673       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2674     }
2675 
2676     /* multiply off-diagonal portion of matrix */
2677     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2678     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2679     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2680     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2681     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2682     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2683     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2684     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2685     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2686     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2687 
2688     /* local sweep */
2689     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2691   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2697 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2698 {
2699   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2700   PetscErrorCode ierr;
2701   Vec            lvec1,bb1;
2702 
2703   PetscFunctionBegin;
2704   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2705   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2706 
2707   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2708     if (flag & SOR_ZERO_INITIAL_GUESS) {
2709       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2710       its--;
2711     }
2712 
2713     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2714     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2715     while (its--) {
2716       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717 
2718       /* lower diagonal part: bb1 = bb - B^T*xx */
2719       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2720       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2721 
2722       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2724       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2725 
2726       /* upper diagonal part: bb1 = bb1 - B*x */
2727       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2728       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2729 
2730       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2731 
2732       /* diagonal sweep */
2733       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2734     }
2735     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2736     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2737   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 #undef __FUNCT__
2742 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2743 /*@
2744      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2745          CSR format the local rows.
2746 
2747    Collective on MPI_Comm
2748 
2749    Input Parameters:
2750 +  comm - MPI communicator
2751 .  bs - the block size, only a block size of 1 is supported
2752 .  m - number of local rows (Cannot be PETSC_DECIDE)
2753 .  n - This value should be the same as the local size used in creating the
2754        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2755        calculated if N is given) For square matrices n is almost always m.
2756 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2757 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2758 .   i - row indices
2759 .   j - column indices
2760 -   a - matrix values
2761 
2762    Output Parameter:
2763 .   mat - the matrix
2764 
2765    Level: intermediate
2766 
2767    Notes:
2768        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2769      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2770      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2771 
2772        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2773 
2774 .keywords: matrix, aij, compressed row, sparse, parallel
2775 
2776 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2777           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2778 @*/
2779 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2780 {
2781   PetscErrorCode ierr;
2782 
2783 
2784   PetscFunctionBegin;
2785   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2786   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2787   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2788   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2789   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2790   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2797 /*@C
2798    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2799    (the default parallel PETSc format).
2800 
2801    Collective on MPI_Comm
2802 
2803    Input Parameters:
2804 +  A - the matrix
2805 .  bs - the block size
2806 .  i - the indices into j for the start of each local row (starts with zero)
2807 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2808 -  v - optional values in the matrix
2809 
2810    Level: developer
2811 
2812 .keywords: matrix, aij, compressed row, sparse, parallel
2813 
2814 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2815 @*/
2816 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2817 {
2818   PetscErrorCode ierr;
2819 
2820   PetscFunctionBegin;
2821   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 
2826