xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 3d472b5489a249cae8c8a56ebcb8e864414a0bfa)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscTruth     roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend){
379           col = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined (PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data --;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else {
399       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
400     }
401   }
402  PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatNorm_MPISBAIJ"
407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
408 {
409   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscReal      sum[2],*lnorm2;
412 
413   PetscFunctionBegin;
414   if (baij->size == 1) {
415     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
416   } else {
417     if (type == NORM_FROBENIUS) {
418       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
419       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
421       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
423       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
424       *norm = sqrt(sum[0] + 2*sum[1]);
425       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
426     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
427       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
428       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
429       PetscReal    *rsum,*rsum2,vabs;
430       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
431       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
432       MatScalar    *v;
433 
434       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
435       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
436       /* Amat */
437       v = amat->a; jj = amat->j;
438       for (brow=0; brow<mbs; brow++) {
439         grow = bs*(rstart + brow);
440         nz = amat->i[brow+1] - amat->i[brow];
441         for (bcol=0; bcol<nz; bcol++){
442           gcol = bs*(rstart + *jj); jj++;
443           for (col=0; col<bs; col++){
444             for (row=0; row<bs; row++){
445               vabs = PetscAbsScalar(*v); v++;
446               rsum[gcol+col] += vabs;
447               /* non-diagonal block */
448               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
449             }
450           }
451         }
452       }
453       /* Bmat */
454       v = bmat->a; jj = bmat->j;
455       for (brow=0; brow<mbs; brow++) {
456         grow = bs*(rstart + brow);
457         nz = bmat->i[brow+1] - bmat->i[brow];
458         for (bcol=0; bcol<nz; bcol++){
459           gcol = bs*garray[*jj]; jj++;
460           for (col=0; col<bs; col++){
461             for (row=0; row<bs; row++){
462               vabs = PetscAbsScalar(*v); v++;
463               rsum[gcol+col] += vabs;
464               rsum[grow+row] += vabs;
465             }
466           }
467         }
468       }
469       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
470       *norm = 0.0;
471       for (col=0; col<mat->cmap->N; col++) {
472         if (rsum2[col] > *norm) *norm = rsum2[col];
473       }
474       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
475     } else {
476       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
477     }
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
485 {
486   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
487   PetscErrorCode ierr;
488   PetscInt       nstash,reallocs;
489   InsertMode     addv;
490 
491   PetscFunctionBegin;
492   if (baij->donotstash) {
493     PetscFunctionReturn(0);
494   }
495 
496   /* make sure all processors are either in INSERTMODE or ADDMODE */
497   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
498   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807 
808   /* diagonal part */
809   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
810   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
811 
812   /* subdiagonal part */
813   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
814 
815   /* copy x into the vec slvec0 */
816   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
820   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   /* supperdiagonal part */
826   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMult_MPISBAIJ"
832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
833 {
834   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
835   PetscErrorCode ierr;
836   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
837   PetscScalar    *x,*from;
838 
839   PetscFunctionBegin;
840   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
841   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
842 
843   /* diagonal part */
844   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
845   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
846 
847   /* subdiagonal part */
848   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
849 
850   /* copy x into the vec slvec0 */
851   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
855   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* supperdiagonal part */
861   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
868 {
869   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
870   PetscErrorCode ierr;
871   PetscInt       nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876 
877   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
878   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
879 
880   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   /* do diagonal part */
882   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
883   /* do supperdiagonal part */
884   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
886   /* do subdiagonal part */
887   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
888   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890 
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
897 {
898   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
899   PetscErrorCode ierr;
900   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
901   PetscScalar    *x,*from,zero=0.0;
902 
903   PetscFunctionBegin;
904   /*
905   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
906   PetscSynchronizedFlush(((PetscObject)A)->comm);
907   */
908   /* diagonal part */
909   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
910   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
911 
912   /* subdiagonal part */
913   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
914 
915   /* copy x into the vec slvec0 */
916   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
917   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
918   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
919   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
920 
921   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
923   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924 
925   /* supperdiagonal part */
926   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   /* do diagonal part */
941   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
942   /* do supperdiagonal part */
943   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
945 
946   /* do subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
948   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950 
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) { max = tmp; }
1009     }
1010     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap  = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart)
1047               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1048             else break;
1049           }
1050           imark = i;
1051         }
1052         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1053         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054       }
1055     } else {
1056       if (idx) *idx = 0;
1057       if (v)   *v   = 0;
1058     }
1059   }
1060   *nz = nztot;
1061   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1069 {
1070   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1071 
1072   PetscFunctionBegin;
1073   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1074   baij->getrowactive = PETSC_FALSE;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1081 {
1082   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1083   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1084 
1085   PetscFunctionBegin;
1086   aA->getrow_utriangular = PETSC_TRUE;
1087   PetscFunctionReturn(0);
1088 }
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1092 {
1093   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1094   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1095 
1096   PetscFunctionBegin;
1097   aA->getrow_utriangular = PETSC_FALSE;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1110   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1117 {
1118   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1123   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1130 {
1131   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1136   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1143 {
1144   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1145   Mat            A = a->A,B = a->B;
1146   PetscErrorCode ierr;
1147   PetscReal      isend[5],irecv[5];
1148 
1149   PetscFunctionBegin;
1150   info->block_size     = (PetscReal)matin->rmap->bs;
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153   isend[3] = info->memory;  isend[4] = info->mallocs;
1154   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1156   isend[3] += info->memory;  isend[4] += info->mallocs;
1157   if (flag == MAT_LOCAL) {
1158     info->nz_used      = isend[0];
1159     info->nz_allocated = isend[1];
1160     info->nz_unneeded  = isend[2];
1161     info->memory       = isend[3];
1162     info->mallocs      = isend[4];
1163   } else if (flag == MAT_GLOBAL_MAX) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   } else if (flag == MAT_GLOBAL_SUM) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else {
1178     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1179   }
1180   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1181   info->fill_ratio_needed = 0;
1182   info->factor_mallocs    = 0;
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1189 {
1190   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1191   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   switch (op) {
1196   case MAT_NEW_NONZERO_LOCATIONS:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1199   case MAT_KEEP_NONZERO_PATTERN:
1200   case MAT_NEW_NONZERO_LOCATION_ERR:
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = flg;
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_NEW_DIAGONALS:
1210     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1211     break;
1212   case MAT_IGNORE_OFF_PROC_ENTRIES:
1213     a->donotstash = flg;
1214     break;
1215   case MAT_USE_HASH_TABLE:
1216     a->ht_flag = flg;
1217     break;
1218   case MAT_HERMITIAN:
1219     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1220     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1221     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1222     break;
1223   case MAT_SYMMETRIC:
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225     break;
1226   case MAT_STRUCTURALLY_SYMMETRIC:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   case MAT_SYMMETRY_ETERNAL:
1230     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1231     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1232     break;
1233   case MAT_IGNORE_LOWER_TRIANGULAR:
1234     aA->ignore_ltriangular = flg;
1235     break;
1236   case MAT_ERROR_LOWER_TRIANGULAR:
1237     aA->ignore_ltriangular = flg;
1238     break;
1239   case MAT_GETROW_UPPERTRIANGULAR:
1240     aA->getrow_utriangular = flg;
1241     break;
1242   default:
1243     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1244   }
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1250 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1251 {
1252   PetscErrorCode ierr;
1253   PetscFunctionBegin;
1254   if (MAT_INITIAL_MATRIX || *B != A) {
1255     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1262 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1263 {
1264   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1265   Mat            a=baij->A, b=baij->B;
1266   PetscErrorCode ierr;
1267   PetscInt       nv,m,n;
1268   PetscTruth     flg;
1269 
1270   PetscFunctionBegin;
1271   if (ll != rr){
1272     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1273     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1274   }
1275   if (!ll) PetscFunctionReturn(0);
1276 
1277   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1278   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1279 
1280   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1281   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1282 
1283   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284 
1285   /* left diagonalscale the off-diagonal part */
1286   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1287 
1288   /* scale the diagonal part */
1289   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1290 
1291   /* right diagonalscale the off-diagonal part */
1292   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1299 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1300 {
1301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatEqual_MPISBAIJ"
1313 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1314 {
1315   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1316   Mat            a,b,c,d;
1317   PetscTruth     flg;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   a = matA->A; b = matA->B;
1322   c = matB->A; d = matB->B;
1323 
1324   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1325   if (flg) {
1326     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1327   }
1328   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatCopy_MPISBAIJ"
1334 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1335 {
1336   PetscErrorCode ierr;
1337   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1338   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1339 
1340   PetscFunctionBegin;
1341   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1342   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1343     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1344     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1345     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1348     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1355 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1356 {
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1366 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1367 {
1368   PetscErrorCode ierr;
1369   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1370   PetscBLASInt   bnz,one=1;
1371   Mat_SeqSBAIJ   *xa,*ya;
1372   Mat_SeqBAIJ    *xb,*yb;
1373 
1374   PetscFunctionBegin;
1375   if (str == SAME_NONZERO_PATTERN) {
1376     PetscScalar alpha = a;
1377     xa = (Mat_SeqSBAIJ *)xx->A->data;
1378     ya = (Mat_SeqSBAIJ *)yy->A->data;
1379     bnz = PetscBLASIntCast(xa->nz);
1380     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1381     xb = (Mat_SeqBAIJ *)xx->B->data;
1382     yb = (Mat_SeqBAIJ *)yy->B->data;
1383     bnz = PetscBLASIntCast(xb->nz);
1384     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1385   } else {
1386     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1387     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1388     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1395 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1396 {
1397   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1398   PetscInt        rbs,cbs;
1399   PetscErrorCode  ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1403   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1404   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1405   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1406   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1407   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1414 {
1415   PetscErrorCode ierr;
1416   PetscInt       i;
1417   PetscTruth     flg;
1418 
1419   PetscFunctionBegin;
1420   for (i=0; i<n; i++) {
1421     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1422     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1423   }
1424   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 /* -------------------------------------------------------------------*/
1430 static struct _MatOps MatOps_Values = {
1431        MatSetValues_MPISBAIJ,
1432        MatGetRow_MPISBAIJ,
1433        MatRestoreRow_MPISBAIJ,
1434        MatMult_MPISBAIJ,
1435 /* 4*/ MatMultAdd_MPISBAIJ,
1436        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1437        MatMultAdd_MPISBAIJ,
1438        0,
1439        0,
1440        0,
1441 /*10*/ 0,
1442        0,
1443        0,
1444        MatSOR_MPISBAIJ,
1445        MatTranspose_MPISBAIJ,
1446 /*15*/ MatGetInfo_MPISBAIJ,
1447        MatEqual_MPISBAIJ,
1448        MatGetDiagonal_MPISBAIJ,
1449        MatDiagonalScale_MPISBAIJ,
1450        MatNorm_MPISBAIJ,
1451 /*20*/ MatAssemblyBegin_MPISBAIJ,
1452        MatAssemblyEnd_MPISBAIJ,
1453        MatSetOption_MPISBAIJ,
1454        MatZeroEntries_MPISBAIJ,
1455 /*24*/ 0,
1456        0,
1457        0,
1458        0,
1459        0,
1460 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1461        0,
1462        0,
1463        0,
1464        0,
1465 /*34*/ MatDuplicate_MPISBAIJ,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*39*/ MatAXPY_MPISBAIJ,
1471        MatGetSubMatrices_MPISBAIJ,
1472        MatIncreaseOverlap_MPISBAIJ,
1473        MatGetValues_MPISBAIJ,
1474        MatCopy_MPISBAIJ,
1475 /*44*/ 0,
1476        MatScale_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480 /*49*/ MatSetBlockSize_MPISBAIJ,
1481        0,
1482        0,
1483        0,
1484        0,
1485 /*54*/ 0,
1486        0,
1487        MatSetUnfactored_MPISBAIJ,
1488        0,
1489        MatSetValuesBlocked_MPISBAIJ,
1490 /*59*/ 0,
1491        0,
1492        0,
1493        0,
1494        0,
1495 /*64*/ 0,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*74*/ 0,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*79*/ 0,
1511        0,
1512        0,
1513        0,
1514        MatLoad_MPISBAIJ,
1515 /*84*/ 0,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*89*/ 0,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*94*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*99*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*104*/0,
1536        MatRealPart_MPISBAIJ,
1537        MatImaginaryPart_MPISBAIJ,
1538        MatGetRowUpperTriangular_MPISBAIJ,
1539        MatRestoreRowUpperTriangular_MPISBAIJ,
1540 /*109*/0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*114*/0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*119*/0,
1551        0,
1552        0,
1553        0
1554 };
1555 
1556 
1557 EXTERN_C_BEGIN
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1560 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1561 {
1562   PetscFunctionBegin;
1563   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1564   *iscopy = PETSC_FALSE;
1565   PetscFunctionReturn(0);
1566 }
1567 EXTERN_C_END
1568 
1569 EXTERN_C_BEGIN
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1572 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1573 {
1574   Mat_MPISBAIJ   *b;
1575   PetscErrorCode ierr;
1576   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1577 
1578   PetscFunctionBegin;
1579   if (bs < 0){
1580     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1581       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1582     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1583     bs   = PetscAbs(bs);
1584   }
1585   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1586   bs = newbs;
1587 
1588   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1589   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1590   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1591   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1592 
1593   B->rmap->bs = B->cmap->bs = bs;
1594   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1595   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1596 
1597   if (d_nnz) {
1598     for (i=0; i<B->rmap->n/bs; i++) {
1599       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1600     }
1601   }
1602   if (o_nnz) {
1603     for (i=0; i<B->rmap->n/bs; i++) {
1604       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1605     }
1606   }
1607 
1608   b   = (Mat_MPISBAIJ*)B->data;
1609   mbs = B->rmap->n/bs;
1610   Mbs = B->rmap->N/bs;
1611   if (mbs*bs != B->rmap->n) {
1612     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1613   }
1614 
1615   B->rmap->bs  = bs;
1616   b->bs2 = bs*bs;
1617   b->mbs = mbs;
1618   b->nbs = mbs;
1619   b->Mbs = Mbs;
1620   b->Nbs = Mbs;
1621 
1622   for (i=0; i<=b->size; i++) {
1623     b->rangebs[i] = B->rmap->range[i]/bs;
1624   }
1625   b->rstartbs = B->rmap->rstart/bs;
1626   b->rendbs   = B->rmap->rend/bs;
1627 
1628   b->cstartbs = B->cmap->rstart/bs;
1629   b->cendbs   = B->cmap->rend/bs;
1630 
1631   if (!B->preallocated) {
1632     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1633     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1634     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1635     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1636     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1637     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1638     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1639     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1640     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1641   }
1642 
1643   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1644   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1645   B->preallocated = PETSC_TRUE;
1646   PetscFunctionReturn(0);
1647 }
1648 EXTERN_C_END
1649 
1650 EXTERN_C_BEGIN
1651 #if defined(PETSC_HAVE_MUMPS)
1652 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1653 #endif
1654 #if defined(PETSC_HAVE_SPOOLES)
1655 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1656 #endif
1657 #if defined(PETSC_HAVE_PASTIX)
1658 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1659 #endif
1660 EXTERN_C_END
1661 
1662 /*MC
1663    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1664    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1665 
1666    Options Database Keys:
1667 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1668 
1669   Level: beginner
1670 
1671 .seealso: MatCreateMPISBAIJ
1672 M*/
1673 
1674 EXTERN_C_BEGIN
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatCreate_MPISBAIJ"
1677 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1678 {
1679   Mat_MPISBAIJ   *b;
1680   PetscErrorCode ierr;
1681   PetscTruth     flg;
1682 
1683   PetscFunctionBegin;
1684 
1685   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1686   B->data = (void*)b;
1687   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1688 
1689   B->ops->destroy    = MatDestroy_MPISBAIJ;
1690   B->ops->view       = MatView_MPISBAIJ;
1691   B->mapping         = 0;
1692   B->assembled       = PETSC_FALSE;
1693 
1694   B->insertmode = NOT_SET_VALUES;
1695   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1696   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1697 
1698   /* build local table of row and column ownerships */
1699   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1700 
1701   /* build cache for off array entries formed */
1702   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1703   b->donotstash  = PETSC_FALSE;
1704   b->colmap      = PETSC_NULL;
1705   b->garray      = PETSC_NULL;
1706   b->roworiented = PETSC_TRUE;
1707 
1708   /* stuff used in block assembly */
1709   b->barray       = 0;
1710 
1711   /* stuff used for matrix vector multiply */
1712   b->lvec         = 0;
1713   b->Mvctx        = 0;
1714   b->slvec0       = 0;
1715   b->slvec0b      = 0;
1716   b->slvec1       = 0;
1717   b->slvec1a      = 0;
1718   b->slvec1b      = 0;
1719   b->sMvctx       = 0;
1720 
1721   /* stuff for MatGetRow() */
1722   b->rowindices   = 0;
1723   b->rowvalues    = 0;
1724   b->getrowactive = PETSC_FALSE;
1725 
1726   /* hash table stuff */
1727   b->ht           = 0;
1728   b->hd           = 0;
1729   b->ht_size      = 0;
1730   b->ht_flag      = PETSC_FALSE;
1731   b->ht_fact      = 0;
1732   b->ht_total_ct  = 0;
1733   b->ht_insert_ct = 0;
1734 
1735   b->in_loc       = 0;
1736   b->v_loc        = 0;
1737   b->n_loc        = 0;
1738   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1739     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1740     if (flg) {
1741       PetscReal fact = 1.39;
1742       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1743       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1744       if (fact <= 1.0) fact = 1.39;
1745       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1746       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1747     }
1748   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1749 
1750 #if defined(PETSC_HAVE_PASTIX)
1751   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1752 					   "MatGetFactor_mpisbaij_pastix",
1753 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1754 #endif
1755 #if defined(PETSC_HAVE_MUMPS)
1756   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1757                                      "MatGetFactor_sbaij_mumps",
1758                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1759 #endif
1760 #if defined(PETSC_HAVE_SPOOLES)
1761   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1762                                      "MatGetFactor_mpisbaij_spooles",
1763                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1764 #endif
1765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1766                                      "MatStoreValues_MPISBAIJ",
1767                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1769                                      "MatRetrieveValues_MPISBAIJ",
1770                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1772                                      "MatGetDiagonalBlock_MPISBAIJ",
1773                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1775                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1776                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1777   B->symmetric                  = PETSC_TRUE;
1778   B->structurally_symmetric     = PETSC_TRUE;
1779   B->symmetric_set              = PETSC_TRUE;
1780   B->structurally_symmetric_set = PETSC_TRUE;
1781   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1782   PetscFunctionReturn(0);
1783 }
1784 EXTERN_C_END
1785 
1786 /*MC
1787    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1788 
1789    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1790    and MATMPISBAIJ otherwise.
1791 
1792    Options Database Keys:
1793 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1794 
1795   Level: beginner
1796 
1797 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1798 M*/
1799 
1800 EXTERN_C_BEGIN
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatCreate_SBAIJ"
1803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1804 {
1805   PetscErrorCode ierr;
1806   PetscMPIInt    size;
1807 
1808   PetscFunctionBegin;
1809   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1810   if (size == 1) {
1811     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1812   } else {
1813     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1814   }
1815   PetscFunctionReturn(0);
1816 }
1817 EXTERN_C_END
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1821 /*@C
1822    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1823    the user should preallocate the matrix storage by setting the parameters
1824    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1825    performance can be increased by more than a factor of 50.
1826 
1827    Collective on Mat
1828 
1829    Input Parameters:
1830 +  A - the matrix
1831 .  bs   - size of blockk
1832 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1833            submatrix  (same for all local rows)
1834 .  d_nnz - array containing the number of block nonzeros in the various block rows
1835            in the upper triangular and diagonal part of the in diagonal portion of the local
1836            (possibly different for each block row) or PETSC_NULL.  You must leave room
1837            for the diagonal entry even if it is zero.
1838 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1839            submatrix (same for all local rows).
1840 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1841            off-diagonal portion of the local submatrix (possibly different for
1842            each block row) or PETSC_NULL.
1843 
1844 
1845    Options Database Keys:
1846 .   -mat_no_unroll - uses code that does not unroll the loops in the
1847                      block calculations (much slower)
1848 .   -mat_block_size - size of the blocks to use
1849 
1850    Notes:
1851 
1852    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1853    than it must be used on all processors that share the object for that argument.
1854 
1855    If the *_nnz parameter is given then the *_nz parameter is ignored
1856 
1857    Storage Information:
1858    For a square global matrix we define each processor's diagonal portion
1859    to be its local rows and the corresponding columns (a square submatrix);
1860    each processor's off-diagonal portion encompasses the remainder of the
1861    local matrix (a rectangular submatrix).
1862 
1863    The user can specify preallocated storage for the diagonal part of
1864    the local submatrix with either d_nz or d_nnz (not both).  Set
1865    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1866    memory allocation.  Likewise, specify preallocated storage for the
1867    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1868 
1869    You can call MatGetInfo() to get information on how effective the preallocation was;
1870    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1871    You can also run with the option -info and look for messages with the string
1872    malloc in them to see if additional memory allocation was needed.
1873 
1874    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1875    the figure below we depict these three local rows and all columns (0-11).
1876 
1877 .vb
1878            0 1 2 3 4 5 6 7 8 9 10 11
1879           -------------------
1880    row 3  |  o o o d d d o o o o o o
1881    row 4  |  o o o d d d o o o o o o
1882    row 5  |  o o o d d d o o o o o o
1883           -------------------
1884 .ve
1885 
1886    Thus, any entries in the d locations are stored in the d (diagonal)
1887    submatrix, and any entries in the o locations are stored in the
1888    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1889    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1890 
1891    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1892    plus the diagonal part of the d matrix,
1893    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1894    In general, for PDE problems in which most nonzeros are near the diagonal,
1895    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1896    or you will get TERRIBLE performance; see the users' manual chapter on
1897    matrices.
1898 
1899    Level: intermediate
1900 
1901 .keywords: matrix, block, aij, compressed row, sparse, parallel
1902 
1903 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1904 @*/
1905 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1906 {
1907   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1908 
1909   PetscFunctionBegin;
1910   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1911   if (f) {
1912     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatCreateMPISBAIJ"
1919 /*@C
1920    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1921    (block compressed row).  For good matrix assembly performance
1922    the user should preallocate the matrix storage by setting the parameters
1923    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1924    performance can be increased by more than a factor of 50.
1925 
1926    Collective on MPI_Comm
1927 
1928    Input Parameters:
1929 +  comm - MPI communicator
1930 .  bs   - size of blockk
1931 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1932            This value should be the same as the local size used in creating the
1933            y vector for the matrix-vector product y = Ax.
1934 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1935            This value should be the same as the local size used in creating the
1936            x vector for the matrix-vector product y = Ax.
1937 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1938 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1939 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1940            submatrix  (same for all local rows)
1941 .  d_nnz - array containing the number of block nonzeros in the various block rows
1942            in the upper triangular portion of the in diagonal portion of the local
1943            (possibly different for each block block row) or PETSC_NULL.
1944            You must leave room for the diagonal entry even if it is zero.
1945 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1946            submatrix (same for all local rows).
1947 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1948            off-diagonal portion of the local submatrix (possibly different for
1949            each block row) or PETSC_NULL.
1950 
1951    Output Parameter:
1952 .  A - the matrix
1953 
1954    Options Database Keys:
1955 .   -mat_no_unroll - uses code that does not unroll the loops in the
1956                      block calculations (much slower)
1957 .   -mat_block_size - size of the blocks to use
1958 .   -mat_mpi - use the parallel matrix data structures even on one processor
1959                (defaults to using SeqBAIJ format on one processor)
1960 
1961    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1962    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1963    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1964 
1965    Notes:
1966    The number of rows and columns must be divisible by blocksize.
1967    This matrix type does not support complex Hermitian operation.
1968 
1969    The user MUST specify either the local or global matrix dimensions
1970    (possibly both).
1971 
1972    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1973    than it must be used on all processors that share the object for that argument.
1974 
1975    If the *_nnz parameter is given then the *_nz parameter is ignored
1976 
1977    Storage Information:
1978    For a square global matrix we define each processor's diagonal portion
1979    to be its local rows and the corresponding columns (a square submatrix);
1980    each processor's off-diagonal portion encompasses the remainder of the
1981    local matrix (a rectangular submatrix).
1982 
1983    The user can specify preallocated storage for the diagonal part of
1984    the local submatrix with either d_nz or d_nnz (not both).  Set
1985    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1986    memory allocation.  Likewise, specify preallocated storage for the
1987    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1988 
1989    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1990    the figure below we depict these three local rows and all columns (0-11).
1991 
1992 .vb
1993            0 1 2 3 4 5 6 7 8 9 10 11
1994           -------------------
1995    row 3  |  o o o d d d o o o o o o
1996    row 4  |  o o o d d d o o o o o o
1997    row 5  |  o o o d d d o o o o o o
1998           -------------------
1999 .ve
2000 
2001    Thus, any entries in the d locations are stored in the d (diagonal)
2002    submatrix, and any entries in the o locations are stored in the
2003    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2004    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2005 
2006    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2007    plus the diagonal part of the d matrix,
2008    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2009    In general, for PDE problems in which most nonzeros are near the diagonal,
2010    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2011    or you will get TERRIBLE performance; see the users' manual chapter on
2012    matrices.
2013 
2014    Level: intermediate
2015 
2016 .keywords: matrix, block, aij, compressed row, sparse, parallel
2017 
2018 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2019 @*/
2020 
2021 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2022 {
2023   PetscErrorCode ierr;
2024   PetscMPIInt    size;
2025 
2026   PetscFunctionBegin;
2027   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2028   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2029   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2030   if (size > 1) {
2031     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2032     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2033   } else {
2034     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2035     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2036   }
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2043 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2044 {
2045   Mat            mat;
2046   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2047   PetscErrorCode ierr;
2048   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2049   PetscScalar    *array;
2050 
2051   PetscFunctionBegin;
2052   *newmat       = 0;
2053   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2054   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2055   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2056   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2057   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2058   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2059 
2060   mat->factortype   = matin->factortype;
2061   mat->preallocated = PETSC_TRUE;
2062   mat->assembled    = PETSC_TRUE;
2063   mat->insertmode   = NOT_SET_VALUES;
2064 
2065   a = (Mat_MPISBAIJ*)mat->data;
2066   a->bs2   = oldmat->bs2;
2067   a->mbs   = oldmat->mbs;
2068   a->nbs   = oldmat->nbs;
2069   a->Mbs   = oldmat->Mbs;
2070   a->Nbs   = oldmat->Nbs;
2071 
2072 
2073   a->size         = oldmat->size;
2074   a->rank         = oldmat->rank;
2075   a->donotstash   = oldmat->donotstash;
2076   a->roworiented  = oldmat->roworiented;
2077   a->rowindices   = 0;
2078   a->rowvalues    = 0;
2079   a->getrowactive = PETSC_FALSE;
2080   a->barray       = 0;
2081   a->rstartbs    = oldmat->rstartbs;
2082   a->rendbs      = oldmat->rendbs;
2083   a->cstartbs    = oldmat->cstartbs;
2084   a->cendbs      = oldmat->cendbs;
2085 
2086   /* hash table stuff */
2087   a->ht           = 0;
2088   a->hd           = 0;
2089   a->ht_size      = 0;
2090   a->ht_flag      = oldmat->ht_flag;
2091   a->ht_fact      = oldmat->ht_fact;
2092   a->ht_total_ct  = 0;
2093   a->ht_insert_ct = 0;
2094 
2095   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2096   if (oldmat->colmap) {
2097 #if defined (PETSC_USE_CTABLE)
2098     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2099 #else
2100     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2101     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2102     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2103 #endif
2104   } else a->colmap = 0;
2105 
2106   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2107     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2108     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2109     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2110   } else a->garray = 0;
2111 
2112   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2113   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2114   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2115   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2116   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2117 
2118   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2120   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2122 
2123   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2124   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2126   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2127   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2128   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2130   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2136 
2137   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2138   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2139   a->sMvctx = oldmat->sMvctx;
2140   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2141 
2142   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2144   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2146   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2147   *newmat = mat;
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatLoad_MPISBAIJ"
2153 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, Mat newmat)
2154 {
2155   PetscErrorCode ierr;
2156   PetscInt       i,nz,j,rstart,rend;
2157   PetscScalar    *vals,*buf;
2158   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2159   MPI_Status     status;
2160   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2161   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2162   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2163   PetscInt       bs=1,Mbs,mbs,extra_rows;
2164   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2165   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2166   int            fd;
2167 
2168   PetscFunctionBegin;
2169   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2170     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2171   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2172 
2173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2174   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2175   if (!rank) {
2176     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2177     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2178     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2179     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2180   }
2181 
2182   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2183 
2184   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2185   M = header[1]; N = header[2];
2186 
2187   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2188   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2189   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2190 
2191   /* If global sizes are set, check if they are consistent with that given in the file */
2192   if (sizesset) {
2193     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2194   }
2195   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2196   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2197 
2198   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2199 
2200   /*
2201      This code adds extra rows to make sure the number of rows is
2202      divisible by the blocksize
2203   */
2204   Mbs        = M/bs;
2205   extra_rows = bs - M + bs*(Mbs);
2206   if (extra_rows == bs) extra_rows = 0;
2207   else                  Mbs++;
2208   if (extra_rows &&!rank) {
2209     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2210   }
2211 
2212   /* determine ownership of all rows */
2213   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2214     mbs        = Mbs/size + ((Mbs % size) > rank);
2215     m          = mbs*bs;
2216   } else { /* User Set */
2217     m          = newmat->rmap->n;
2218     mbs        = m/bs;
2219   }
2220   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2221   mmbs       = PetscMPIIntCast(mbs);
2222   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2223   rowners[0] = 0;
2224   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2225   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2226   rstart = rowners[rank];
2227   rend   = rowners[rank+1];
2228 
2229   /* distribute row lengths to all processors */
2230   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2231   if (!rank) {
2232     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2233     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2234     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2235     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2236     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2237     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2238     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2239   } else {
2240     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2241   }
2242 
2243   if (!rank) {   /* procs[0] */
2244     /* calculate the number of nonzeros on each processor */
2245     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2246     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2247     for (i=0; i<size; i++) {
2248       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2249         procsnz[i] += rowlengths[j];
2250       }
2251     }
2252     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2253 
2254     /* determine max buffer needed and allocate it */
2255     maxnz = 0;
2256     for (i=0; i<size; i++) {
2257       maxnz = PetscMax(maxnz,procsnz[i]);
2258     }
2259     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2260 
2261     /* read in my part of the matrix column indices  */
2262     nz     = procsnz[0];
2263     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2264     mycols = ibuf;
2265     if (size == 1)  nz -= extra_rows;
2266     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2267     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2268 
2269     /* read in every ones (except the last) and ship off */
2270     for (i=1; i<size-1; i++) {
2271       nz   = procsnz[i];
2272       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2273       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2274     }
2275     /* read in the stuff for the last proc */
2276     if (size != 1) {
2277       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2278       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2279       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2280       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2281     }
2282     ierr = PetscFree(cols);CHKERRQ(ierr);
2283   } else {  /* procs[i], i>0 */
2284     /* determine buffer space needed for message */
2285     nz = 0;
2286     for (i=0; i<m; i++) {
2287       nz += locrowlens[i];
2288     }
2289     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2290     mycols = ibuf;
2291     /* receive message of column indices*/
2292     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2293     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2294     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2295   }
2296 
2297   /* loop over local rows, determining number of off diagonal entries */
2298   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2299   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2300   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2301   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2302   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2303   rowcount = 0;
2304   nzcount  = 0;
2305   for (i=0; i<mbs; i++) {
2306     dcount  = 0;
2307     odcount = 0;
2308     for (j=0; j<bs; j++) {
2309       kmax = locrowlens[rowcount];
2310       for (k=0; k<kmax; k++) {
2311         tmp = mycols[nzcount++]/bs; /* block col. index */
2312         if (!mask[tmp]) {
2313           mask[tmp] = 1;
2314           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2315           else masked1[dcount++] = tmp; /* entry in diag portion */
2316         }
2317       }
2318       rowcount++;
2319     }
2320 
2321     dlens[i]  = dcount;  /* d_nzz[i] */
2322     odlens[i] = odcount; /* o_nzz[i] */
2323 
2324     /* zero out the mask elements we set */
2325     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2326     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2327   }
2328     if (!sizesset) {
2329     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2330   }
2331   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2332   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2333 
2334   if (!rank) {
2335     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2336     /* read in my part of the matrix numerical values  */
2337     nz = procsnz[0];
2338     vals = buf;
2339     mycols = ibuf;
2340     if (size == 1)  nz -= extra_rows;
2341     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2342     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2343 
2344     /* insert into matrix */
2345     jj      = rstart*bs;
2346     for (i=0; i<m; i++) {
2347       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2348       mycols += locrowlens[i];
2349       vals   += locrowlens[i];
2350       jj++;
2351     }
2352 
2353     /* read in other processors (except the last one) and ship out */
2354     for (i=1; i<size-1; i++) {
2355       nz   = procsnz[i];
2356       vals = buf;
2357       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2358       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2359     }
2360     /* the last proc */
2361     if (size != 1){
2362       nz   = procsnz[i] - extra_rows;
2363       vals = buf;
2364       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2365       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2366       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2367     }
2368     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2369 
2370   } else {
2371     /* receive numeric values */
2372     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2373 
2374     /* receive message of values*/
2375     vals   = buf;
2376     mycols = ibuf;
2377     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2378     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2379     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2380 
2381     /* insert into matrix */
2382     jj      = rstart*bs;
2383     for (i=0; i<m; i++) {
2384       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2385       mycols += locrowlens[i];
2386       vals   += locrowlens[i];
2387       jj++;
2388     }
2389   }
2390 
2391   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2392   ierr = PetscFree(buf);CHKERRQ(ierr);
2393   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2394   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2395   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2396   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2397   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2398   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2404 /*XXXXX@
2405    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2406 
2407    Input Parameters:
2408 .  mat  - the matrix
2409 .  fact - factor
2410 
2411    Collective on Mat
2412 
2413    Level: advanced
2414 
2415   Notes:
2416    This can also be set by the command line option: -mat_use_hash_table fact
2417 
2418 .keywords: matrix, hashtable, factor, HT
2419 
2420 .seealso: MatSetOption()
2421 @XXXXX*/
2422 
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2426 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2427 {
2428   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2429   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2430   PetscReal      atmp;
2431   PetscReal      *work,*svalues,*rvalues;
2432   PetscErrorCode ierr;
2433   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2434   PetscMPIInt    rank,size;
2435   PetscInt       *rowners_bs,dest,count,source;
2436   PetscScalar    *va;
2437   MatScalar      *ba;
2438   MPI_Status     stat;
2439 
2440   PetscFunctionBegin;
2441   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2442   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2443   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2444 
2445   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2446   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2447 
2448   bs   = A->rmap->bs;
2449   mbs  = a->mbs;
2450   Mbs  = a->Mbs;
2451   ba   = b->a;
2452   bi   = b->i;
2453   bj   = b->j;
2454 
2455   /* find ownerships */
2456   rowners_bs = A->rmap->range;
2457 
2458   /* each proc creates an array to be distributed */
2459   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2460   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2461 
2462   /* row_max for B */
2463   if (rank != size-1){
2464     for (i=0; i<mbs; i++) {
2465       ncols = bi[1] - bi[0]; bi++;
2466       brow  = bs*i;
2467       for (j=0; j<ncols; j++){
2468         bcol = bs*(*bj);
2469         for (kcol=0; kcol<bs; kcol++){
2470           col = bcol + kcol;                 /* local col index */
2471           col += rowners_bs[rank+1];      /* global col index */
2472           for (krow=0; krow<bs; krow++){
2473             atmp = PetscAbsScalar(*ba); ba++;
2474             row = brow + krow;    /* local row index */
2475             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2476             if (work[col] < atmp) work[col] = atmp;
2477           }
2478         }
2479         bj++;
2480       }
2481     }
2482 
2483     /* send values to its owners */
2484     for (dest=rank+1; dest<size; dest++){
2485       svalues = work + rowners_bs[dest];
2486       count   = rowners_bs[dest+1]-rowners_bs[dest];
2487       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2488     }
2489   }
2490 
2491   /* receive values */
2492   if (rank){
2493     rvalues = work;
2494     count   = rowners_bs[rank+1]-rowners_bs[rank];
2495     for (source=0; source<rank; source++){
2496       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2497       /* process values */
2498       for (i=0; i<count; i++){
2499         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2500       }
2501     }
2502   }
2503 
2504   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2505   ierr = PetscFree(work);CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatSOR_MPISBAIJ"
2511 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2512 {
2513   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2514   PetscErrorCode    ierr;
2515   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2516   PetscScalar       *x,*ptr,*from;
2517   Vec               bb1;
2518   const PetscScalar *b;
2519 
2520   PetscFunctionBegin;
2521   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2522   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2523 
2524   if (flag == SOR_APPLY_UPPER) {
2525     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2526     PetscFunctionReturn(0);
2527   }
2528 
2529   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2530     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2531       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2532       its--;
2533     }
2534 
2535     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2536     while (its--){
2537 
2538       /* lower triangular part: slvec0b = - B^T*xx */
2539       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2540 
2541       /* copy xx into slvec0a */
2542       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2543       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2544       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2545       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2546 
2547       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2548 
2549       /* copy bb into slvec1a */
2550       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2551       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2552       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2553       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2554 
2555       /* set slvec1b = 0 */
2556       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2557 
2558       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2559       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2560       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2561       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2562 
2563       /* upper triangular part: bb1 = bb1 - B*x */
2564       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2565 
2566       /* local diagonal sweep */
2567       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2568     }
2569     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2570   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2571     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2572   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2573     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2574   } else if (flag & SOR_EISENSTAT) {
2575     Vec               xx1;
2576     PetscTruth        hasop;
2577     const PetscScalar *diag;
2578     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2579     PetscInt          i,n;
2580 
2581     if (!mat->xx1) {
2582       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2583       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2584     }
2585     xx1 = mat->xx1;
2586     bb1 = mat->bb1;
2587 
2588     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2589 
2590     if (!mat->diag) {
2591       /* this is wrong for same matrix with new nonzero values */
2592       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2593       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2594     }
2595     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2596 
2597     if (hasop) {
2598       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2599       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2600     } else {
2601       /*
2602           These two lines are replaced by code that may be a bit faster for a good compiler
2603       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2604       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2605       */
2606       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2607       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2608       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2609       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2610       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2611       if (omega == 1.0) {
2612 	for (i=0; i<n; i++) {
2613 	  sl[i] = b[i] - diag[i]*x[i];
2614 	}
2615         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2616       } else {
2617 	for (i=0; i<n; i++) {
2618 	  sl[i] = b[i] + scale*diag[i]*x[i];
2619 	}
2620         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2621       }
2622       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2623       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2624       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2625       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2626     }
2627 
2628     /* multiply off-diagonal portion of matrix */
2629     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2630     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2631     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2632     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2633     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2634     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2635     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2636     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2637     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2638     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2639 
2640     /* local sweep */
2641     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2642     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2643   } else {
2644     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2645   }
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2651 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2652 {
2653   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2654   PetscErrorCode ierr;
2655   Vec            lvec1,bb1;
2656 
2657   PetscFunctionBegin;
2658   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2659   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2660 
2661   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2662     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2663       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2664       its--;
2665     }
2666 
2667     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2668     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2669     while (its--){
2670       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2671 
2672       /* lower diagonal part: bb1 = bb - B^T*xx */
2673       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2674       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2675 
2676       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2677       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2678       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2679 
2680       /* upper diagonal part: bb1 = bb1 - B*x */
2681       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2682       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2683 
2684       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2685 
2686       /* diagonal sweep */
2687       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2688     }
2689     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2690     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2691   } else {
2692     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2693   }
2694   PetscFunctionReturn(0);
2695 }
2696 
2697