xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 7212da7c135ebda4e0bbb2601b2d797d12ac4dc2)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscTruth     roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = sqrt(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscTruth     other_disassembled;
522   PetscMPIInt    n;
523   PetscTruth     r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscTruth        iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
626               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
627               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
632       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
634       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_INFO) {
637       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
640       PetscFunctionReturn(0);
641     }
642   }
643 
644   if (isdraw) {
645     PetscDraw  draw;
646     PetscTruth isnull;
647     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
648     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
654   } else {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
671     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
672 
673     /* copy over the A part */
674     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
675     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
676     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
677 
678     for (i=0; i<mbs; i++) {
679       rvals[0] = bs*(baij->rstartbs + i);
680       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
681       for (j=ai[i]; j<ai[i+1]; j++) {
682         col = (baij->cstartbs+aj[j])*bs;
683         for (k=0; k<bs; k++) {
684           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
685           col++; a += bs;
686         }
687       }
688     }
689     /* copy over the B part */
690     Bloc = (Mat_SeqBAIJ*)baij->B->data;
691     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
692     for (i=0; i<mbs; i++) {
693 
694       rvals[0] = bs*(baij->rstartbs + i);
695       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
696       for (j=ai[i]; j<ai[i+1]; j++) {
697         col = baij->garray[aj[j]]*bs;
698         for (k=0; k<bs; k++) {
699           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
700           col++; a += bs;
701         }
702       }
703     }
704     ierr = PetscFree(rvals);CHKERRQ(ierr);
705     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     /*
708        Everyone has to call to draw the matrix since the graphics waits are
709        synchronized across all processors that share the PetscDraw object
710     */
711     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
712     if (!rank) {
713       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
714       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
715     }
716     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
717     ierr = MatDestroy(A);CHKERRQ(ierr);
718   }
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatView_MPISBAIJ"
724 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
725 {
726   PetscErrorCode ierr;
727   PetscTruth     iascii,isdraw,issocket,isbinary;
728 
729   PetscFunctionBegin;
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
734   if (iascii || isdraw || issocket || isbinary) {
735     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
736   } else {
737     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDestroy_MPISBAIJ"
744 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750 #if defined(PETSC_USE_LOG)
751   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
752 #endif
753   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
754   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
755   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
756   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
757 #if defined (PETSC_USE_CTABLE)
758   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
759 #else
760   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
761 #endif
762   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
763   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
764   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
765   if (baij->slvec0) {
766     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
767     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
768   }
769   if (baij->slvec1) {
770     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
771     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
772     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
773   }
774   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
775   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
776   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
777   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
778   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
779   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
780   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
781 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
782   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
783 #endif
784   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
787   ierr = PetscFree(baij);CHKERRQ(ierr);
788 
789   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(((PetscObject)A)->comm);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952 
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) { max = tmp; }
1011     }
1012     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap  = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart)
1049               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050             else break;
1051           }
1052           imark = i;
1053         }
1054         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1055         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056       }
1057     } else {
1058       if (idx) *idx = 0;
1059       if (v)   *v   = 0;
1060     }
1061   }
1062   *nz = nztot;
1063   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1076   baij->getrowactive = PETSC_FALSE;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1082 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1086 
1087   PetscFunctionBegin;
1088   aA->getrow_utriangular = PETSC_TRUE;
1089   PetscFunctionReturn(0);
1090 }
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1105 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1106 {
1107   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1118 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->rmap->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1210     break;
1211   case MAT_NEW_DIAGONALS:
1212     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = flg;
1216     break;
1217   case MAT_USE_HASH_TABLE:
1218     a->ht_flag = flg;
1219     break;
1220   case MAT_HERMITIAN:
1221     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set                         = PETSC_TRUE;
1227     A->spd                             = flg;
1228     if (flg) {
1229       A->symmetric                     = PETSC_TRUE;
1230       A->structurally_symmetric        = PETSC_TRUE;
1231       A->symmetric_set                 = PETSC_TRUE;
1232       A->structurally_symmetric_set    = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscTruth     flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1286   }
1287   if (!ll) PetscFunctionReturn(0);
1288 
1289   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1290   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1291 
1292   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1293   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1294 
1295   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296 
1297   /* left diagonalscale the off-diagonal part */
1298   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1299 
1300   /* scale the diagonal part */
1301   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1302 
1303   /* right diagonalscale the off-diagonal part */
1304   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1311 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatEqual_MPISBAIJ"
1325 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1326 {
1327   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1328   Mat            a,b,c,d;
1329   PetscTruth     flg;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPISBAIJ"
1346 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1350   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1351 
1352   PetscFunctionBegin;
1353   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1354   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1355     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1358   } else {
1359     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1360     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1367 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1368 {
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa = (Mat_SeqSBAIJ *)xx->A->data;
1390     ya = (Mat_SeqSBAIJ *)yy->A->data;
1391     bnz = PetscBLASIntCast(xa->nz);
1392     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1393     xb = (Mat_SeqBAIJ *)xx->B->data;
1394     yb = (Mat_SeqBAIJ *)yy->B->data;
1395     bnz = PetscBLASIntCast(xb->nz);
1396     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1397   } else {
1398     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1407 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1408 {
1409   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1410   PetscInt        rbs,cbs;
1411   PetscErrorCode  ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1415   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1416   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1417   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1418   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1419   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1425 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1426 {
1427   PetscErrorCode ierr;
1428   PetscInt       i;
1429   PetscTruth     flg;
1430 
1431   PetscFunctionBegin;
1432   for (i=0; i<n; i++) {
1433     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1434     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1435   }
1436   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 
1441 /* -------------------------------------------------------------------*/
1442 static struct _MatOps MatOps_Values = {
1443        MatSetValues_MPISBAIJ,
1444        MatGetRow_MPISBAIJ,
1445        MatRestoreRow_MPISBAIJ,
1446        MatMult_MPISBAIJ,
1447 /* 4*/ MatMultAdd_MPISBAIJ,
1448        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1449        MatMultAdd_MPISBAIJ,
1450        0,
1451        0,
1452        0,
1453 /*10*/ 0,
1454        0,
1455        0,
1456        MatSOR_MPISBAIJ,
1457        MatTranspose_MPISBAIJ,
1458 /*15*/ MatGetInfo_MPISBAIJ,
1459        MatEqual_MPISBAIJ,
1460        MatGetDiagonal_MPISBAIJ,
1461        MatDiagonalScale_MPISBAIJ,
1462        MatNorm_MPISBAIJ,
1463 /*20*/ MatAssemblyBegin_MPISBAIJ,
1464        MatAssemblyEnd_MPISBAIJ,
1465        MatSetOption_MPISBAIJ,
1466        MatZeroEntries_MPISBAIJ,
1467 /*24*/ 0,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477 /*34*/ MatDuplicate_MPISBAIJ,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*39*/ MatAXPY_MPISBAIJ,
1483        MatGetSubMatrices_MPISBAIJ,
1484        MatIncreaseOverlap_MPISBAIJ,
1485        MatGetValues_MPISBAIJ,
1486        MatCopy_MPISBAIJ,
1487 /*44*/ 0,
1488        MatScale_MPISBAIJ,
1489        0,
1490        0,
1491        0,
1492 /*49*/ MatSetBlockSize_MPISBAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497 /*54*/ 0,
1498        0,
1499        MatSetUnfactored_MPISBAIJ,
1500        0,
1501        MatSetValuesBlocked_MPISBAIJ,
1502 /*59*/ 0,
1503        0,
1504        0,
1505        0,
1506        0,
1507 /*64*/ 0,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1513        0,
1514        0,
1515        0,
1516        0,
1517 /*74*/ 0,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*79*/ 0,
1523        0,
1524        0,
1525        0,
1526        MatLoad_MPISBAIJ,
1527 /*84*/ 0,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*89*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*94*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*99*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*104*/0,
1548        MatRealPart_MPISBAIJ,
1549        MatImaginaryPart_MPISBAIJ,
1550        MatGetRowUpperTriangular_MPISBAIJ,
1551        MatRestoreRowUpperTriangular_MPISBAIJ,
1552 /*109*/0,
1553        0,
1554        0,
1555        0,
1556        0,
1557 /*114*/0,
1558        0,
1559        0,
1560        0,
1561        0,
1562 /*119*/0,
1563        0,
1564        0,
1565        0
1566 };
1567 
1568 
1569 EXTERN_C_BEGIN
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1572 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1573 {
1574   PetscFunctionBegin;
1575   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1576   *iscopy = PETSC_FALSE;
1577   PetscFunctionReturn(0);
1578 }
1579 EXTERN_C_END
1580 
1581 EXTERN_C_BEGIN
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1584 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1585 {
1586   Mat_MPISBAIJ   *b;
1587   PetscErrorCode ierr;
1588   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1589 
1590   PetscFunctionBegin;
1591   if (bs < 0){
1592     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1593       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1594     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1595     bs   = PetscAbs(bs);
1596   }
1597   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1598   bs = newbs;
1599 
1600   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1601   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1602   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1603   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1604 
1605   B->rmap->bs = B->cmap->bs = bs;
1606   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1607   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1608 
1609   if (d_nnz) {
1610     for (i=0; i<B->rmap->n/bs; i++) {
1611       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1612     }
1613   }
1614   if (o_nnz) {
1615     for (i=0; i<B->rmap->n/bs; i++) {
1616       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1617     }
1618   }
1619 
1620   b   = (Mat_MPISBAIJ*)B->data;
1621   mbs = B->rmap->n/bs;
1622   Mbs = B->rmap->N/bs;
1623   if (mbs*bs != B->rmap->n) {
1624     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1625   }
1626 
1627   B->rmap->bs  = bs;
1628   b->bs2 = bs*bs;
1629   b->mbs = mbs;
1630   b->nbs = mbs;
1631   b->Mbs = Mbs;
1632   b->Nbs = Mbs;
1633 
1634   for (i=0; i<=b->size; i++) {
1635     b->rangebs[i] = B->rmap->range[i]/bs;
1636   }
1637   b->rstartbs = B->rmap->rstart/bs;
1638   b->rendbs   = B->rmap->rend/bs;
1639 
1640   b->cstartbs = B->cmap->rstart/bs;
1641   b->cendbs   = B->cmap->rend/bs;
1642 
1643   if (!B->preallocated) {
1644     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1645     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1646     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1647     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1648     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1649     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1650     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1651     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1652     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1653   }
1654 
1655   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1656   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1657   B->preallocated = PETSC_TRUE;
1658   PetscFunctionReturn(0);
1659 }
1660 EXTERN_C_END
1661 
1662 EXTERN_C_BEGIN
1663 #if defined(PETSC_HAVE_MUMPS)
1664 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1665 #endif
1666 #if defined(PETSC_HAVE_SPOOLES)
1667 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1668 #endif
1669 #if defined(PETSC_HAVE_PASTIX)
1670 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1671 #endif
1672 EXTERN_C_END
1673 
1674 /*MC
1675    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1676    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1677 
1678    Options Database Keys:
1679 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1680 
1681   Level: beginner
1682 
1683 .seealso: MatCreateMPISBAIJ
1684 M*/
1685 
1686 EXTERN_C_BEGIN
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatCreate_MPISBAIJ"
1689 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1690 {
1691   Mat_MPISBAIJ   *b;
1692   PetscErrorCode ierr;
1693   PetscTruth     flg;
1694 
1695   PetscFunctionBegin;
1696 
1697   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1698   B->data = (void*)b;
1699   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1700 
1701   B->ops->destroy    = MatDestroy_MPISBAIJ;
1702   B->ops->view       = MatView_MPISBAIJ;
1703   B->mapping         = 0;
1704   B->assembled       = PETSC_FALSE;
1705 
1706   B->insertmode = NOT_SET_VALUES;
1707   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1708   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1709 
1710   /* build local table of row and column ownerships */
1711   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1712 
1713   /* build cache for off array entries formed */
1714   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1715   b->donotstash  = PETSC_FALSE;
1716   b->colmap      = PETSC_NULL;
1717   b->garray      = PETSC_NULL;
1718   b->roworiented = PETSC_TRUE;
1719 
1720   /* stuff used in block assembly */
1721   b->barray       = 0;
1722 
1723   /* stuff used for matrix vector multiply */
1724   b->lvec         = 0;
1725   b->Mvctx        = 0;
1726   b->slvec0       = 0;
1727   b->slvec0b      = 0;
1728   b->slvec1       = 0;
1729   b->slvec1a      = 0;
1730   b->slvec1b      = 0;
1731   b->sMvctx       = 0;
1732 
1733   /* stuff for MatGetRow() */
1734   b->rowindices   = 0;
1735   b->rowvalues    = 0;
1736   b->getrowactive = PETSC_FALSE;
1737 
1738   /* hash table stuff */
1739   b->ht           = 0;
1740   b->hd           = 0;
1741   b->ht_size      = 0;
1742   b->ht_flag      = PETSC_FALSE;
1743   b->ht_fact      = 0;
1744   b->ht_total_ct  = 0;
1745   b->ht_insert_ct = 0;
1746 
1747   b->in_loc       = 0;
1748   b->v_loc        = 0;
1749   b->n_loc        = 0;
1750   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1751     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1752     if (flg) {
1753       PetscReal fact = 1.39;
1754       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1755       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1756       if (fact <= 1.0) fact = 1.39;
1757       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1758       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1759     }
1760   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1761 
1762 #if defined(PETSC_HAVE_PASTIX)
1763   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1764 					   "MatGetFactor_mpisbaij_pastix",
1765 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1766 #endif
1767 #if defined(PETSC_HAVE_MUMPS)
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1769                                      "MatGetFactor_sbaij_mumps",
1770                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1771 #endif
1772 #if defined(PETSC_HAVE_SPOOLES)
1773   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1774                                      "MatGetFactor_mpisbaij_spooles",
1775                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1776 #endif
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1778                                      "MatStoreValues_MPISBAIJ",
1779                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1781                                      "MatRetrieveValues_MPISBAIJ",
1782                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1783   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1784                                      "MatGetDiagonalBlock_MPISBAIJ",
1785                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1787                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1788                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1789   B->symmetric                  = PETSC_TRUE;
1790   B->structurally_symmetric     = PETSC_TRUE;
1791   B->symmetric_set              = PETSC_TRUE;
1792   B->structurally_symmetric_set = PETSC_TRUE;
1793   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 EXTERN_C_END
1797 
1798 /*MC
1799    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1800 
1801    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1802    and MATMPISBAIJ otherwise.
1803 
1804    Options Database Keys:
1805 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1806 
1807   Level: beginner
1808 
1809 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1810 M*/
1811 
1812 EXTERN_C_BEGIN
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatCreate_SBAIJ"
1815 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1816 {
1817   PetscErrorCode ierr;
1818   PetscMPIInt    size;
1819 
1820   PetscFunctionBegin;
1821   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1822   if (size == 1) {
1823     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1824   } else {
1825     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 EXTERN_C_END
1830 
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1833 /*@C
1834    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1835    the user should preallocate the matrix storage by setting the parameters
1836    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1837    performance can be increased by more than a factor of 50.
1838 
1839    Collective on Mat
1840 
1841    Input Parameters:
1842 +  A - the matrix
1843 .  bs   - size of blockk
1844 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1845            submatrix  (same for all local rows)
1846 .  d_nnz - array containing the number of block nonzeros in the various block rows
1847            in the upper triangular and diagonal part of the in diagonal portion of the local
1848            (possibly different for each block row) or PETSC_NULL.  You must leave room
1849            for the diagonal entry even if it is zero.
1850 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1851            submatrix (same for all local rows).
1852 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1853            off-diagonal portion of the local submatrix (possibly different for
1854            each block row) or PETSC_NULL.
1855 
1856 
1857    Options Database Keys:
1858 .   -mat_no_unroll - uses code that does not unroll the loops in the
1859                      block calculations (much slower)
1860 .   -mat_block_size - size of the blocks to use
1861 
1862    Notes:
1863 
1864    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1865    than it must be used on all processors that share the object for that argument.
1866 
1867    If the *_nnz parameter is given then the *_nz parameter is ignored
1868 
1869    Storage Information:
1870    For a square global matrix we define each processor's diagonal portion
1871    to be its local rows and the corresponding columns (a square submatrix);
1872    each processor's off-diagonal portion encompasses the remainder of the
1873    local matrix (a rectangular submatrix).
1874 
1875    The user can specify preallocated storage for the diagonal part of
1876    the local submatrix with either d_nz or d_nnz (not both).  Set
1877    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1878    memory allocation.  Likewise, specify preallocated storage for the
1879    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1880 
1881    You can call MatGetInfo() to get information on how effective the preallocation was;
1882    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1883    You can also run with the option -info and look for messages with the string
1884    malloc in them to see if additional memory allocation was needed.
1885 
1886    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1887    the figure below we depict these three local rows and all columns (0-11).
1888 
1889 .vb
1890            0 1 2 3 4 5 6 7 8 9 10 11
1891           -------------------
1892    row 3  |  o o o d d d o o o o o o
1893    row 4  |  o o o d d d o o o o o o
1894    row 5  |  o o o d d d o o o o o o
1895           -------------------
1896 .ve
1897 
1898    Thus, any entries in the d locations are stored in the d (diagonal)
1899    submatrix, and any entries in the o locations are stored in the
1900    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1901    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1902 
1903    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1904    plus the diagonal part of the d matrix,
1905    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1906    In general, for PDE problems in which most nonzeros are near the diagonal,
1907    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1908    or you will get TERRIBLE performance; see the users' manual chapter on
1909    matrices.
1910 
1911    Level: intermediate
1912 
1913 .keywords: matrix, block, aij, compressed row, sparse, parallel
1914 
1915 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1916 @*/
1917 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1918 {
1919   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1920 
1921   PetscFunctionBegin;
1922   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1923   if (f) {
1924     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1925   }
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatCreateMPISBAIJ"
1931 /*@C
1932    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1933    (block compressed row).  For good matrix assembly performance
1934    the user should preallocate the matrix storage by setting the parameters
1935    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1936    performance can be increased by more than a factor of 50.
1937 
1938    Collective on MPI_Comm
1939 
1940    Input Parameters:
1941 +  comm - MPI communicator
1942 .  bs   - size of blockk
1943 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1944            This value should be the same as the local size used in creating the
1945            y vector for the matrix-vector product y = Ax.
1946 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1947            This value should be the same as the local size used in creating the
1948            x vector for the matrix-vector product y = Ax.
1949 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1950 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1951 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1952            submatrix  (same for all local rows)
1953 .  d_nnz - array containing the number of block nonzeros in the various block rows
1954            in the upper triangular portion of the in diagonal portion of the local
1955            (possibly different for each block block row) or PETSC_NULL.
1956            You must leave room for the diagonal entry even if it is zero.
1957 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1958            submatrix (same for all local rows).
1959 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1960            off-diagonal portion of the local submatrix (possibly different for
1961            each block row) or PETSC_NULL.
1962 
1963    Output Parameter:
1964 .  A - the matrix
1965 
1966    Options Database Keys:
1967 .   -mat_no_unroll - uses code that does not unroll the loops in the
1968                      block calculations (much slower)
1969 .   -mat_block_size - size of the blocks to use
1970 .   -mat_mpi - use the parallel matrix data structures even on one processor
1971                (defaults to using SeqBAIJ format on one processor)
1972 
1973    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1974    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1975    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1976 
1977    Notes:
1978    The number of rows and columns must be divisible by blocksize.
1979    This matrix type does not support complex Hermitian operation.
1980 
1981    The user MUST specify either the local or global matrix dimensions
1982    (possibly both).
1983 
1984    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1985    than it must be used on all processors that share the object for that argument.
1986 
1987    If the *_nnz parameter is given then the *_nz parameter is ignored
1988 
1989    Storage Information:
1990    For a square global matrix we define each processor's diagonal portion
1991    to be its local rows and the corresponding columns (a square submatrix);
1992    each processor's off-diagonal portion encompasses the remainder of the
1993    local matrix (a rectangular submatrix).
1994 
1995    The user can specify preallocated storage for the diagonal part of
1996    the local submatrix with either d_nz or d_nnz (not both).  Set
1997    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1998    memory allocation.  Likewise, specify preallocated storage for the
1999    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2000 
2001    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2002    the figure below we depict these three local rows and all columns (0-11).
2003 
2004 .vb
2005            0 1 2 3 4 5 6 7 8 9 10 11
2006           -------------------
2007    row 3  |  o o o d d d o o o o o o
2008    row 4  |  o o o d d d o o o o o o
2009    row 5  |  o o o d d d o o o o o o
2010           -------------------
2011 .ve
2012 
2013    Thus, any entries in the d locations are stored in the d (diagonal)
2014    submatrix, and any entries in the o locations are stored in the
2015    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2016    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2017 
2018    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2019    plus the diagonal part of the d matrix,
2020    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2021    In general, for PDE problems in which most nonzeros are near the diagonal,
2022    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2023    or you will get TERRIBLE performance; see the users' manual chapter on
2024    matrices.
2025 
2026    Level: intermediate
2027 
2028 .keywords: matrix, block, aij, compressed row, sparse, parallel
2029 
2030 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2031 @*/
2032 
2033 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2034 {
2035   PetscErrorCode ierr;
2036   PetscMPIInt    size;
2037 
2038   PetscFunctionBegin;
2039   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2040   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2041   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2042   if (size > 1) {
2043     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2044     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2045   } else {
2046     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2047     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2055 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2056 {
2057   Mat            mat;
2058   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2059   PetscErrorCode ierr;
2060   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2061   PetscScalar    *array;
2062 
2063   PetscFunctionBegin;
2064   *newmat       = 0;
2065   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2066   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2067   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2068   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2069   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2070   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2071 
2072   mat->factortype   = matin->factortype;
2073   mat->preallocated = PETSC_TRUE;
2074   mat->assembled    = PETSC_TRUE;
2075   mat->insertmode   = NOT_SET_VALUES;
2076 
2077   a = (Mat_MPISBAIJ*)mat->data;
2078   a->bs2   = oldmat->bs2;
2079   a->mbs   = oldmat->mbs;
2080   a->nbs   = oldmat->nbs;
2081   a->Mbs   = oldmat->Mbs;
2082   a->Nbs   = oldmat->Nbs;
2083 
2084 
2085   a->size         = oldmat->size;
2086   a->rank         = oldmat->rank;
2087   a->donotstash   = oldmat->donotstash;
2088   a->roworiented  = oldmat->roworiented;
2089   a->rowindices   = 0;
2090   a->rowvalues    = 0;
2091   a->getrowactive = PETSC_FALSE;
2092   a->barray       = 0;
2093   a->rstartbs    = oldmat->rstartbs;
2094   a->rendbs      = oldmat->rendbs;
2095   a->cstartbs    = oldmat->cstartbs;
2096   a->cendbs      = oldmat->cendbs;
2097 
2098   /* hash table stuff */
2099   a->ht           = 0;
2100   a->hd           = 0;
2101   a->ht_size      = 0;
2102   a->ht_flag      = oldmat->ht_flag;
2103   a->ht_fact      = oldmat->ht_fact;
2104   a->ht_total_ct  = 0;
2105   a->ht_insert_ct = 0;
2106 
2107   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2108   if (oldmat->colmap) {
2109 #if defined (PETSC_USE_CTABLE)
2110     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2111 #else
2112     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2113     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2115 #endif
2116   } else a->colmap = 0;
2117 
2118   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2119     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2120     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2121     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2122   } else a->garray = 0;
2123 
2124   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2125   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2126   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2127   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2128   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2129 
2130   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2132   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2134 
2135   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2136   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2137   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2138   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2139   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2140   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2141   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2142   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2146   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2148 
2149   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2150   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2151   a->sMvctx = oldmat->sMvctx;
2152   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2153 
2154   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2156   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2158   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2159   *newmat = mat;
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatLoad_MPISBAIJ"
2165 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2166 {
2167   PetscErrorCode ierr;
2168   PetscInt       i,nz,j,rstart,rend;
2169   PetscScalar    *vals,*buf;
2170   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2171   MPI_Status     status;
2172   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2173   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2174   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2175   PetscInt       bs=1,Mbs,mbs,extra_rows;
2176   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2177   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2178   int            fd;
2179 
2180   PetscFunctionBegin;
2181   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2182     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2183   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2184 
2185   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2186   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2187   if (!rank) {
2188     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2189     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2190     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2191     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2192   }
2193 
2194   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2195 
2196   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2197   M = header[1]; N = header[2];
2198 
2199   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2200   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2201   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2202 
2203   /* If global sizes are set, check if they are consistent with that given in the file */
2204   if (sizesset) {
2205     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2206   }
2207   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2208   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2209 
2210   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2211 
2212   /*
2213      This code adds extra rows to make sure the number of rows is
2214      divisible by the blocksize
2215   */
2216   Mbs        = M/bs;
2217   extra_rows = bs - M + bs*(Mbs);
2218   if (extra_rows == bs) extra_rows = 0;
2219   else                  Mbs++;
2220   if (extra_rows &&!rank) {
2221     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2222   }
2223 
2224   /* determine ownership of all rows */
2225   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2226     mbs        = Mbs/size + ((Mbs % size) > rank);
2227     m          = mbs*bs;
2228   } else { /* User Set */
2229     m          = newmat->rmap->n;
2230     mbs        = m/bs;
2231   }
2232   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2233   mmbs       = PetscMPIIntCast(mbs);
2234   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2235   rowners[0] = 0;
2236   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2237   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2238   rstart = rowners[rank];
2239   rend   = rowners[rank+1];
2240 
2241   /* distribute row lengths to all processors */
2242   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2243   if (!rank) {
2244     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2245     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2246     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2247     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2248     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2249     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2250     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2251   } else {
2252     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2253   }
2254 
2255   if (!rank) {   /* procs[0] */
2256     /* calculate the number of nonzeros on each processor */
2257     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2258     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2259     for (i=0; i<size; i++) {
2260       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2261         procsnz[i] += rowlengths[j];
2262       }
2263     }
2264     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2265 
2266     /* determine max buffer needed and allocate it */
2267     maxnz = 0;
2268     for (i=0; i<size; i++) {
2269       maxnz = PetscMax(maxnz,procsnz[i]);
2270     }
2271     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2272 
2273     /* read in my part of the matrix column indices  */
2274     nz     = procsnz[0];
2275     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2276     mycols = ibuf;
2277     if (size == 1)  nz -= extra_rows;
2278     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2279     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2280 
2281     /* read in every ones (except the last) and ship off */
2282     for (i=1; i<size-1; i++) {
2283       nz   = procsnz[i];
2284       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2285       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2286     }
2287     /* read in the stuff for the last proc */
2288     if (size != 1) {
2289       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2290       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2291       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2292       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2293     }
2294     ierr = PetscFree(cols);CHKERRQ(ierr);
2295   } else {  /* procs[i], i>0 */
2296     /* determine buffer space needed for message */
2297     nz = 0;
2298     for (i=0; i<m; i++) {
2299       nz += locrowlens[i];
2300     }
2301     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2302     mycols = ibuf;
2303     /* receive message of column indices*/
2304     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2305     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2306     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2307   }
2308 
2309   /* loop over local rows, determining number of off diagonal entries */
2310   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2311   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2312   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2313   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2314   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2315   rowcount = 0;
2316   nzcount  = 0;
2317   for (i=0; i<mbs; i++) {
2318     dcount  = 0;
2319     odcount = 0;
2320     for (j=0; j<bs; j++) {
2321       kmax = locrowlens[rowcount];
2322       for (k=0; k<kmax; k++) {
2323         tmp = mycols[nzcount++]/bs; /* block col. index */
2324         if (!mask[tmp]) {
2325           mask[tmp] = 1;
2326           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2327           else masked1[dcount++] = tmp; /* entry in diag portion */
2328         }
2329       }
2330       rowcount++;
2331     }
2332 
2333     dlens[i]  = dcount;  /* d_nzz[i] */
2334     odlens[i] = odcount; /* o_nzz[i] */
2335 
2336     /* zero out the mask elements we set */
2337     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2338     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2339   }
2340     if (!sizesset) {
2341     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2342   }
2343   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2344   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2345 
2346   if (!rank) {
2347     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2348     /* read in my part of the matrix numerical values  */
2349     nz = procsnz[0];
2350     vals = buf;
2351     mycols = ibuf;
2352     if (size == 1)  nz -= extra_rows;
2353     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2354     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2355 
2356     /* insert into matrix */
2357     jj      = rstart*bs;
2358     for (i=0; i<m; i++) {
2359       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2360       mycols += locrowlens[i];
2361       vals   += locrowlens[i];
2362       jj++;
2363     }
2364 
2365     /* read in other processors (except the last one) and ship out */
2366     for (i=1; i<size-1; i++) {
2367       nz   = procsnz[i];
2368       vals = buf;
2369       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2370       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2371     }
2372     /* the last proc */
2373     if (size != 1){
2374       nz   = procsnz[i] - extra_rows;
2375       vals = buf;
2376       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2377       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2378       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2379     }
2380     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2381 
2382   } else {
2383     /* receive numeric values */
2384     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2385 
2386     /* receive message of values*/
2387     vals   = buf;
2388     mycols = ibuf;
2389     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2390     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2391     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2392 
2393     /* insert into matrix */
2394     jj      = rstart*bs;
2395     for (i=0; i<m; i++) {
2396       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2397       mycols += locrowlens[i];
2398       vals   += locrowlens[i];
2399       jj++;
2400     }
2401   }
2402 
2403   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2404   ierr = PetscFree(buf);CHKERRQ(ierr);
2405   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2406   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2407   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2408   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2409   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2410   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2416 /*XXXXX@
2417    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2418 
2419    Input Parameters:
2420 .  mat  - the matrix
2421 .  fact - factor
2422 
2423    Not Collective on Mat, each process can have a different hash factor
2424 
2425    Level: advanced
2426 
2427   Notes:
2428    This can also be set by the command line option: -mat_use_hash_table fact
2429 
2430 .keywords: matrix, hashtable, factor, HT
2431 
2432 .seealso: MatSetOption()
2433 @XXXXX*/
2434 
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2438 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2439 {
2440   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2441   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2442   PetscReal      atmp;
2443   PetscReal      *work,*svalues,*rvalues;
2444   PetscErrorCode ierr;
2445   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2446   PetscMPIInt    rank,size;
2447   PetscInt       *rowners_bs,dest,count,source;
2448   PetscScalar    *va;
2449   MatScalar      *ba;
2450   MPI_Status     stat;
2451 
2452   PetscFunctionBegin;
2453   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2454   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456 
2457   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2458   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2459 
2460   bs   = A->rmap->bs;
2461   mbs  = a->mbs;
2462   Mbs  = a->Mbs;
2463   ba   = b->a;
2464   bi   = b->i;
2465   bj   = b->j;
2466 
2467   /* find ownerships */
2468   rowners_bs = A->rmap->range;
2469 
2470   /* each proc creates an array to be distributed */
2471   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2472   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2473 
2474   /* row_max for B */
2475   if (rank != size-1){
2476     for (i=0; i<mbs; i++) {
2477       ncols = bi[1] - bi[0]; bi++;
2478       brow  = bs*i;
2479       for (j=0; j<ncols; j++){
2480         bcol = bs*(*bj);
2481         for (kcol=0; kcol<bs; kcol++){
2482           col = bcol + kcol;                 /* local col index */
2483           col += rowners_bs[rank+1];      /* global col index */
2484           for (krow=0; krow<bs; krow++){
2485             atmp = PetscAbsScalar(*ba); ba++;
2486             row = brow + krow;    /* local row index */
2487             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2488             if (work[col] < atmp) work[col] = atmp;
2489           }
2490         }
2491         bj++;
2492       }
2493     }
2494 
2495     /* send values to its owners */
2496     for (dest=rank+1; dest<size; dest++){
2497       svalues = work + rowners_bs[dest];
2498       count   = rowners_bs[dest+1]-rowners_bs[dest];
2499       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2500     }
2501   }
2502 
2503   /* receive values */
2504   if (rank){
2505     rvalues = work;
2506     count   = rowners_bs[rank+1]-rowners_bs[rank];
2507     for (source=0; source<rank; source++){
2508       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2509       /* process values */
2510       for (i=0; i<count; i++){
2511         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2512       }
2513     }
2514   }
2515 
2516   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2517   ierr = PetscFree(work);CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatSOR_MPISBAIJ"
2523 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2524 {
2525   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2526   PetscErrorCode    ierr;
2527   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2528   PetscScalar       *x,*ptr,*from;
2529   Vec               bb1;
2530   const PetscScalar *b;
2531 
2532   PetscFunctionBegin;
2533   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2534   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2535 
2536   if (flag == SOR_APPLY_UPPER) {
2537     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2538     PetscFunctionReturn(0);
2539   }
2540 
2541   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2542     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2543       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2544       its--;
2545     }
2546 
2547     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2548     while (its--){
2549 
2550       /* lower triangular part: slvec0b = - B^T*xx */
2551       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2552 
2553       /* copy xx into slvec0a */
2554       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2555       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2556       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2557       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2558 
2559       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2560 
2561       /* copy bb into slvec1a */
2562       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2563       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2564       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2565       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2566 
2567       /* set slvec1b = 0 */
2568       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2569 
2570       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2571       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2572       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2573       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2574 
2575       /* upper triangular part: bb1 = bb1 - B*x */
2576       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2577 
2578       /* local diagonal sweep */
2579       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2580     }
2581     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2582   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2583     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2584   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2585     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2586   } else if (flag & SOR_EISENSTAT) {
2587     Vec               xx1;
2588     PetscTruth        hasop;
2589     const PetscScalar *diag;
2590     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2591     PetscInt          i,n;
2592 
2593     if (!mat->xx1) {
2594       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2595       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2596     }
2597     xx1 = mat->xx1;
2598     bb1 = mat->bb1;
2599 
2600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2601 
2602     if (!mat->diag) {
2603       /* this is wrong for same matrix with new nonzero values */
2604       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2605       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2606     }
2607     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2608 
2609     if (hasop) {
2610       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2611       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2612     } else {
2613       /*
2614           These two lines are replaced by code that may be a bit faster for a good compiler
2615       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2616       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2617       */
2618       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2619       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2620       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2621       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2622       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2623       if (omega == 1.0) {
2624 	for (i=0; i<n; i++) {
2625 	  sl[i] = b[i] - diag[i]*x[i];
2626 	}
2627         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2628       } else {
2629 	for (i=0; i<n; i++) {
2630 	  sl[i] = b[i] + scale*diag[i]*x[i];
2631 	}
2632         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2633       }
2634       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2635       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2636       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2637       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2638     }
2639 
2640     /* multiply off-diagonal portion of matrix */
2641     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2642     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2643     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2644     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2645     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2646     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2647     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2648     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2649     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2650     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2651 
2652     /* local sweep */
2653     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2654     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2655   } else {
2656     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2657   }
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2663 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2664 {
2665   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2666   PetscErrorCode ierr;
2667   Vec            lvec1,bb1;
2668 
2669   PetscFunctionBegin;
2670   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2671   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2672 
2673   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2674     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2675       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2676       its--;
2677     }
2678 
2679     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2680     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2681     while (its--){
2682       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2683 
2684       /* lower diagonal part: bb1 = bb - B^T*xx */
2685       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2686       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2687 
2688       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2689       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2690       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2691 
2692       /* upper diagonal part: bb1 = bb1 - B*x */
2693       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2694       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2695 
2696       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2697 
2698       /* diagonal sweep */
2699       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2700     }
2701     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2702     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2703   } else {
2704     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2705   }
2706   PetscFunctionReturn(0);
2707 }
2708 
2709