xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision cdc7d174e19834f285ac5eea29ad817d7b62eeac)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define CHUNKSIZE  10
56 
57 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
58 { \
59  \
60     brow = row/bs;  \
61     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
62     rmax = aimax[brow]; nrow = ailen[brow]; \
63       bcol = col/bs; \
64       ridx = row % bs; cidx = col % bs; \
65       low = 0; high = nrow; \
66       while (high-low > 3) { \
67         t = (low+high)/2; \
68         if (rp[t] > bcol) high = t; \
69         else              low  = t; \
70       } \
71       for (_i=low; _i<high; _i++) { \
72         if (rp[_i] > bcol) break; \
73         if (rp[_i] == bcol) { \
74           bap  = ap +  bs2*_i + bs*cidx + ridx; \
75           if (addv == ADD_VALUES) *bap += value;  \
76           else                    *bap  = value;  \
77           goto a_noinsert; \
78         } \
79       } \
80       if (a->nonew == 1) goto a_noinsert; \
81       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
82       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
83       N = nrow++ - 1;  \
84       /* shift up all the later entries in this row */ \
85       for (ii=N; ii>=_i; ii--) { \
86         rp[ii+1] = rp[ii]; \
87         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
88       } \
89       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
90       rp[_i]                      = bcol;  \
91       ap[bs2*_i + bs*cidx + ridx] = value;  \
92       a_noinsert:; \
93     ailen[brow] = nrow; \
94 }
95 
96 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
97 { \
98     brow = row/bs;  \
99     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
100     rmax = bimax[brow]; nrow = bilen[brow]; \
101       bcol = col/bs; \
102       ridx = row % bs; cidx = col % bs; \
103       low = 0; high = nrow; \
104       while (high-low > 3) { \
105         t = (low+high)/2; \
106         if (rp[t] > bcol) high = t; \
107         else              low  = t; \
108       } \
109       for (_i=low; _i<high; _i++) { \
110         if (rp[_i] > bcol) break; \
111         if (rp[_i] == bcol) { \
112           bap  = ap +  bs2*_i + bs*cidx + ridx; \
113           if (addv == ADD_VALUES) *bap += value;  \
114           else                    *bap  = value;  \
115           goto b_noinsert; \
116         } \
117       } \
118       if (b->nonew == 1) goto b_noinsert; \
119       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
120       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
121       N = nrow++ - 1;  \
122       /* shift up all the later entries in this row */ \
123       for (ii=N; ii>=_i; ii--) { \
124         rp[ii+1] = rp[ii]; \
125         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
126       } \
127       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
128       rp[_i]                      = bcol;  \
129       ap[bs2*_i + bs*cidx + ridx] = value;  \
130       b_noinsert:; \
131     bilen[brow] = nrow; \
132 }
133 
134 /* Only add/insert a(i,j) with i<=j (blocks).
135    Any a(i,j) with i>j input by user is ingored.
136 */
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSetValues_MPISBAIJ"
139 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
140 {
141   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
142   MatScalar      value;
143   PetscTruth     roworiented = baij->roworiented;
144   PetscErrorCode ierr;
145   PetscInt       i,j,row,col;
146   PetscInt       rstart_orig=mat->rmap->rstart;
147   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
148   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
149 
150   /* Some Variables required in the macro */
151   Mat            A = baij->A;
152   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
153   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
154   MatScalar      *aa=a->a;
155 
156   Mat            B = baij->B;
157   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
158   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
159   MatScalar     *ba=b->a;
160 
161   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
162   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
163   MatScalar     *ap,*bap;
164 
165   /* for stash */
166   PetscInt      n_loc, *in_loc = PETSC_NULL;
167   MatScalar     *v_loc = PETSC_NULL;
168 
169   PetscFunctionBegin;
170   if (v) PetscValidScalarPointer(v,6);
171   if (!baij->donotstash){
172     if (n > baij->n_loc) {
173       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
176       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
177       baij->n_loc = n;
178     }
179     in_loc = baij->in_loc;
180     v_loc  = baij->v_loc;
181   }
182 
183   for (i=0; i<m; i++) {
184     if (im[i] < 0) continue;
185 #if defined(PETSC_USE_DEBUG)
186     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
187 #endif
188     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
189       row = im[i] - rstart_orig;              /* local row index */
190       for (j=0; j<n; j++) {
191         if (im[i]/bs > in[j]/bs){
192           if (a->ignore_ltriangular){
193             continue;    /* ignore lower triangular blocks */
194           } else {
195             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
196           }
197         }
198         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
199           col = in[j] - cstart_orig;          /* local col index */
200           brow = row/bs; bcol = col/bs;
201           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
202           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
203           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
204           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
205         } else if (in[j] < 0) continue;
206 #if defined(PETSC_USE_DEBUG)
207         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
208 #endif
209         else {  /* off-diag entry (B) */
210           if (mat->was_assembled) {
211             if (!baij->colmap) {
212               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
213             }
214 #if defined (PETSC_USE_CTABLE)
215             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
216             col  = col - 1;
217 #else
218             col = baij->colmap[in[j]/bs] - 1;
219 #endif
220             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
221               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
222               col =  in[j];
223               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
224               B = baij->B;
225               b = (Mat_SeqBAIJ*)(B)->data;
226               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
227               ba=b->a;
228             } else col += in[j]%bs;
229           } else col = in[j];
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         }
234       }
235     } else {  /* off processor entry */
236       if (!baij->donotstash) {
237         n_loc = 0;
238         for (j=0; j<n; j++){
239           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
240           in_loc[n_loc] = in[j];
241           if (roworiented) {
242             v_loc[n_loc] = v[i*n+j];
243           } else {
244             v_loc[n_loc] = v[j*m+i];
245           }
246           n_loc++;
247         }
248         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
258 {
259   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
260   const MatScalar *value;
261   MatScalar       *barray=baij->barray;
262   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
263   PetscErrorCode  ierr;
264   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
265   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
266   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
267 
268   PetscFunctionBegin;
269   if(!barray) {
270     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
271     baij->barray = barray;
272   }
273 
274   if (roworiented) {
275     stepval = (n-1)*bs;
276   } else {
277     stepval = (m-1)*bs;
278   }
279   for (i=0; i<m; i++) {
280     if (im[i] < 0) continue;
281 #if defined(PETSC_USE_DEBUG)
282     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
283 #endif
284     if (im[i] >= rstart && im[i] < rend) {
285       row = im[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (im[i] > in[j]) {
288           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
289           else SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
290         }
291         /* If NumCol = 1 then a copy is not required */
292         if ((roworiented) && (n == 1)) {
293           barray = (MatScalar*) v + i*bs2;
294         } else if((!roworiented) && (m == 1)) {
295           barray = (MatScalar*) v + j*bs2;
296         } else { /* Here a copy is required */
297           if (roworiented) {
298             value = v + i*(stepval+bs)*bs + j*bs;
299           } else {
300             value = v + j*(stepval+bs)*bs + i*bs;
301           }
302           for (ii=0; ii<bs; ii++,value+=stepval) {
303             for (jj=0; jj<bs; jj++) {
304               *barray++  = *value++;
305             }
306           }
307           barray -=bs2;
308         }
309 
310         if (in[j] >= cstart && in[j] < cend){
311           col  = in[j] - cstart;
312           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
313         }
314         else if (in[j] < 0) continue;
315 #if defined(PETSC_USE_DEBUG)
316         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
317 #endif
318         else {
319           if (mat->was_assembled) {
320             if (!baij->colmap) {
321               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
322             }
323 
324 #if defined(PETSC_USE_DEBUG)
325 #if defined (PETSC_USE_CTABLE)
326             { PetscInt data;
327               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
328               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
329             }
330 #else
331             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
332 #endif
333 #endif
334 #if defined (PETSC_USE_CTABLE)
335 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
336             col  = (col - 1)/bs;
337 #else
338             col = (baij->colmap[in[j]] - 1)/bs;
339 #endif
340             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
341               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
342               col =  in[j];
343             }
344           }
345           else col = in[j];
346           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
347         }
348       }
349     } else {
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = sqrt(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) {
501     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
502   }
503   mat->insertmode = addv; /* in case this processor had no cache */
504 
505   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
506   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
510   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
516 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
519   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
520   PetscErrorCode ierr;
521   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
522   PetscInt       *row,*col;
523   PetscTruth     other_disassembled;
524   PetscMPIInt    n;
525   PetscTruth     r1,r2,r3;
526   MatScalar      *val;
527   InsertMode     addv = mat->insertmode;
528 
529   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
530   PetscFunctionBegin;
531 
532   if (!baij->donotstash) {
533     while (1) {
534       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
535       if (!flg) break;
536 
537       for (i=0; i<n;) {
538         /* Now identify the consecutive vals belonging to the same row */
539         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
540         if (j < n) ncols = j-i;
541         else       ncols = n-i;
542         /* Now assemble all these values with a single function call */
543         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
544         i = j;
545       }
546     }
547     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
548     /* Now process the block-stash. Since the values are stashed column-oriented,
549        set the roworiented flag to column oriented, and after MatSetValues()
550        restore the original flags */
551     r1 = baij->roworiented;
552     r2 = a->roworiented;
553     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
554     baij->roworiented = PETSC_FALSE;
555     a->roworiented    = PETSC_FALSE;
556     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
557     while (1) {
558       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
559       if (!flg) break;
560 
561       for (i=0; i<n;) {
562         /* Now identify the consecutive vals belonging to the same row */
563         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
564         if (j < n) ncols = j-i;
565         else       ncols = n-i;
566         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
571     baij->roworiented = r1;
572     a->roworiented    = r2;
573     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
574   }
575 
576   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
577   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
578 
579   /* determine if any processor has disassembled, if so we must
580      also disassemble ourselfs, in order that we may reassemble. */
581   /*
582      if nonzero structure of submatrix B cannot change then we know that
583      no processor disassembled thus we can skip this stuff
584   */
585   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
586     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
587     if (mat->was_assembled && !other_disassembled) {
588       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
589     }
590   }
591 
592   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
593     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
594   }
595   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
596   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
597   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
598 
599   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
600   baij->rowvalues = 0;
601 
602   PetscFunctionReturn(0);
603 }
604 
605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606 #undef __FUNCT__
607 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
608 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
609 {
610   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
611   PetscErrorCode    ierr;
612   PetscInt          bs = mat->rmap->bs;
613   PetscMPIInt       size = baij->size,rank = baij->rank;
614   PetscTruth        iascii,isdraw;
615   PetscViewer       sviewer;
616   PetscViewerFormat format;
617 
618   PetscFunctionBegin;
619   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
620   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
621   if (iascii) {
622     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
626       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
628              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
633       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
634       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
635       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_INFO) {
638       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
639       PetscFunctionReturn(0);
640     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
641       PetscFunctionReturn(0);
642     }
643   }
644 
645   if (isdraw) {
646     PetscDraw  draw;
647     PetscTruth isnull;
648     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
649     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
650   }
651 
652   if (size == 1) {
653     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
654     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
655   } else {
656     /* assemble the entire matrix onto first processor. */
657     Mat          A;
658     Mat_SeqSBAIJ *Aloc;
659     Mat_SeqBAIJ  *Bloc;
660     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
661     MatScalar    *a;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
676     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++; a += bs;
687         }
688       }
689     }
690     /* copy over the B part */
691     Bloc = (Mat_SeqBAIJ*)baij->B->data;
692     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
693     for (i=0; i<mbs; i++) {
694 
695       rvals[0] = bs*(baij->rstartbs + i);
696       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
697       for (j=ai[i]; j<ai[i+1]; j++) {
698         col = baij->garray[aj[j]]*bs;
699         for (k=0; k<bs; k++) {
700           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
701           col++; a += bs;
702         }
703       }
704     }
705     ierr = PetscFree(rvals);CHKERRQ(ierr);
706     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     /*
709        Everyone has to call to draw the matrix since the graphics waits are
710        synchronized across all processors that share the PetscDraw object
711     */
712     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
713     if (!rank) {
714       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
715       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
716     }
717     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     ierr = MatDestroy(A);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPISBAIJ"
725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
726 {
727   PetscErrorCode ierr;
728   PetscTruth     iascii,isdraw,issocket,isbinary;
729 
730   PetscFunctionBegin;
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
735   if (iascii || isdraw || issocket || isbinary) {
736     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
737   } else {
738     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatDestroy_MPISBAIJ"
745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751 #if defined(PETSC_USE_LOG)
752   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
753 #endif
754   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
755   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
756   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
757   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
758 #if defined (PETSC_USE_CTABLE)
759   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
760 #else
761   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
762 #endif
763   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
764   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
765   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
766   if (baij->slvec0) {
767     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
768     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
769   }
770   if (baij->slvec1) {
771     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
772     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
773     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
774   }
775   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
776   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
777   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
778   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
779   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
780   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
781   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
782 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
783   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
784 #endif
785   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
788   ierr = PetscFree(baij);CHKERRQ(ierr);
789 
790   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
801 {
802   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
805   PetscScalar    *x,*from;
806 
807   PetscFunctionBegin;
808   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
809   if (nt != A->cmap->n) {
810     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
811   }
812 
813   /* diagonal part */
814   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
815   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
816 
817   /* subdiagonal part */
818   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
819 
820   /* copy x into the vec slvec0 */
821   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
825   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
826   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
827 
828   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830   /* supperdiagonal part */
831   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatMult_MPISBAIJ"
837 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
838 {
839   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
840   PetscErrorCode ierr;
841   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
842   PetscScalar    *x,*from;
843 
844   PetscFunctionBegin;
845   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
846   if (nt != A->cmap->n) {
847     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
848   }
849 
850   /* diagonal part */
851   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
852   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
853 
854   /* subdiagonal part */
855   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
856 
857   /* copy x into the vec slvec0 */
858   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
862   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
863   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
864 
865   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   /* supperdiagonal part */
868   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
874 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
875 {
876   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
877   PetscErrorCode ierr;
878   PetscInt       nt;
879 
880   PetscFunctionBegin;
881   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
882   if (nt != A->cmap->n) {
883     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
884   }
885   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
886   if (nt != A->rmap->N) {
887     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
888   }
889 
890   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891   /* do diagonal part */
892   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
893   /* do supperdiagonal part */
894   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
896   /* do subdiagonal part */
897   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
898   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
899   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
900 
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
906 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
907 {
908   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
909   PetscErrorCode ierr;
910   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
911   PetscScalar    *x,*from,zero=0.0;
912 
913   PetscFunctionBegin;
914   /*
915   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
916   PetscSynchronizedFlush(((PetscObject)A)->comm);
917   */
918   /* diagonal part */
919   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
920   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
921 
922   /* subdiagonal part */
923   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
924 
925   /* copy x into the vec slvec0 */
926   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
927   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
928   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
929   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
930 
931   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
933   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
934 
935   /* supperdiagonal part */
936   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
937 
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
943 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
944 {
945   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950   /* do diagonal part */
951   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
952   /* do supperdiagonal part */
953   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
955 
956   /* do subdiagonal part */
957   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
958   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
959   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
960 
961   PetscFunctionReturn(0);
962 }
963 
964 /*
965   This only works correctly for square matrices where the subblock A->A is the
966    diagonal block
967 */
968 #undef __FUNCT__
969 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
970 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
977   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatScale_MPISBAIJ"
983 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
984 {
985   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
990   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatGetRow_MPISBAIJ"
996 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
997 {
998   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
999   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1000   PetscErrorCode ierr;
1001   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1002   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1003   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1004 
1005   PetscFunctionBegin;
1006   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1007   mat->getrowactive = PETSC_TRUE;
1008 
1009   if (!mat->rowvalues && (idx || v)) {
1010     /*
1011         allocate enough space to hold information from the longest row.
1012     */
1013     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1014     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1015     PetscInt     max = 1,mbs = mat->mbs,tmp;
1016     for (i=0; i<mbs; i++) {
1017       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1018       if (max < tmp) { max = tmp; }
1019     }
1020     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1021   }
1022 
1023   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1024   lrow = row - brstart;  /* local row index */
1025 
1026   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1027   if (!v)   {pvA = 0; pvB = 0;}
1028   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1029   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1030   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1031   nztot = nzA + nzB;
1032 
1033   cmap  = mat->garray;
1034   if (v  || idx) {
1035     if (nztot) {
1036       /* Sort by increasing column numbers, assuming A and B already sorted */
1037       PetscInt imark = -1;
1038       if (v) {
1039         *v = v_p = mat->rowvalues;
1040         for (i=0; i<nzB; i++) {
1041           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1042           else break;
1043         }
1044         imark = i;
1045         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1046         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1047       }
1048       if (idx) {
1049         *idx = idx_p = mat->rowindices;
1050         if (imark > -1) {
1051           for (i=0; i<imark; i++) {
1052             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1053           }
1054         } else {
1055           for (i=0; i<nzB; i++) {
1056             if (cmap[cworkB[i]/bs] < cstart)
1057               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1058             else break;
1059           }
1060           imark = i;
1061         }
1062         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1063         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1064       }
1065     } else {
1066       if (idx) *idx = 0;
1067       if (v)   *v   = 0;
1068     }
1069   }
1070   *nz = nztot;
1071   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1072   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1078 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1079 {
1080   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1081 
1082   PetscFunctionBegin;
1083   if (!baij->getrowactive) {
1084     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1085   }
1086   baij->getrowactive = PETSC_FALSE;
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1092 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1093 {
1094   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1095   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1096 
1097   PetscFunctionBegin;
1098   aA->getrow_utriangular = PETSC_TRUE;
1099   PetscFunctionReturn(0);
1100 }
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1103 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1107 
1108   PetscFunctionBegin;
1109   aA->getrow_utriangular = PETSC_FALSE;
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1115 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1116 {
1117   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1122   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1128 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1129 {
1130   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1135   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1141 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1142 {
1143   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1144   PetscErrorCode ierr;
1145 
1146   PetscFunctionBegin;
1147   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1148   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1154 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1155 {
1156   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1157   Mat            A = a->A,B = a->B;
1158   PetscErrorCode ierr;
1159   PetscReal      isend[5],irecv[5];
1160 
1161   PetscFunctionBegin;
1162   info->block_size     = (PetscReal)matin->rmap->bs;
1163   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1164   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1165   isend[3] = info->memory;  isend[4] = info->mallocs;
1166   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1167   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1168   isend[3] += info->memory;  isend[4] += info->mallocs;
1169   if (flag == MAT_LOCAL) {
1170     info->nz_used      = isend[0];
1171     info->nz_allocated = isend[1];
1172     info->nz_unneeded  = isend[2];
1173     info->memory       = isend[3];
1174     info->mallocs      = isend[4];
1175   } else if (flag == MAT_GLOBAL_MAX) {
1176     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1177     info->nz_used      = irecv[0];
1178     info->nz_allocated = irecv[1];
1179     info->nz_unneeded  = irecv[2];
1180     info->memory       = irecv[3];
1181     info->mallocs      = irecv[4];
1182   } else if (flag == MAT_GLOBAL_SUM) {
1183     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1184     info->nz_used      = irecv[0];
1185     info->nz_allocated = irecv[1];
1186     info->nz_unneeded  = irecv[2];
1187     info->memory       = irecv[3];
1188     info->mallocs      = irecv[4];
1189   } else {
1190     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1191   }
1192   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1193   info->fill_ratio_needed = 0;
1194   info->factor_mallocs    = 0;
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1200 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1201 {
1202   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1203   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   switch (op) {
1208   case MAT_NEW_NONZERO_LOCATIONS:
1209   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1210   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1211   case MAT_KEEP_NONZERO_PATTERN:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1214     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1215     break;
1216   case MAT_ROW_ORIENTED:
1217     a->roworiented = flg;
1218     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1219     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1220     break;
1221   case MAT_NEW_DIAGONALS:
1222     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1223     break;
1224   case MAT_IGNORE_OFF_PROC_ENTRIES:
1225     a->donotstash = flg;
1226     break;
1227   case MAT_USE_HASH_TABLE:
1228     a->ht_flag = flg;
1229     break;
1230   case MAT_HERMITIAN:
1231     if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1232     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1233     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscTruth     flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg)
1286       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1287   }
1288   if (!ll) PetscFunctionReturn(0);
1289 
1290   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1291   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1292 
1293   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1294   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1295 
1296   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297 
1298   /* left diagonalscale the off-diagonal part */
1299   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1300 
1301   /* scale the diagonal part */
1302   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1303 
1304   /* right diagonalscale the off-diagonal part */
1305   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1312 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatEqual_MPISBAIJ"
1326 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1327 {
1328   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1329   Mat            a,b,c,d;
1330   PetscTruth     flg;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   a = matA->A; b = matA->B;
1335   c = matB->A; d = matB->B;
1336 
1337   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1338   if (flg) {
1339     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1340   }
1341   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatCopy_MPISBAIJ"
1347 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1348 {
1349   PetscErrorCode ierr;
1350   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1351   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1352 
1353   PetscFunctionBegin;
1354   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1355   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1356     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1357     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1358     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1359   } else {
1360     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1361     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1368 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1379 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1380 {
1381   PetscErrorCode ierr;
1382   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1383   PetscBLASInt   bnz,one=1;
1384   Mat_SeqSBAIJ   *xa,*ya;
1385   Mat_SeqBAIJ    *xb,*yb;
1386 
1387   PetscFunctionBegin;
1388   if (str == SAME_NONZERO_PATTERN) {
1389     PetscScalar alpha = a;
1390     xa = (Mat_SeqSBAIJ *)xx->A->data;
1391     ya = (Mat_SeqSBAIJ *)yy->A->data;
1392     bnz = PetscBLASIntCast(xa->nz);
1393     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1394     xb = (Mat_SeqBAIJ *)xx->B->data;
1395     yb = (Mat_SeqBAIJ *)yy->B->data;
1396     bnz = PetscBLASIntCast(xb->nz);
1397     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1398   } else {
1399     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1400     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1401     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1408 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1409 {
1410   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1411   PetscInt        rbs,cbs;
1412   PetscErrorCode  ierr;
1413 
1414   PetscFunctionBegin;
1415   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1416   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1417   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1418   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1419   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1420   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1426 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1427 {
1428   PetscErrorCode ierr;
1429   PetscInt       i;
1430   PetscTruth     flg;
1431 
1432   PetscFunctionBegin;
1433   for (i=0; i<n; i++) {
1434     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1435     if (!flg) {
1436       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1437     }
1438   }
1439   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 
1444 /* -------------------------------------------------------------------*/
1445 static struct _MatOps MatOps_Values = {
1446        MatSetValues_MPISBAIJ,
1447        MatGetRow_MPISBAIJ,
1448        MatRestoreRow_MPISBAIJ,
1449        MatMult_MPISBAIJ,
1450 /* 4*/ MatMultAdd_MPISBAIJ,
1451        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1452        MatMultAdd_MPISBAIJ,
1453        0,
1454        0,
1455        0,
1456 /*10*/ 0,
1457        0,
1458        0,
1459        MatSOR_MPISBAIJ,
1460        MatTranspose_MPISBAIJ,
1461 /*15*/ MatGetInfo_MPISBAIJ,
1462        MatEqual_MPISBAIJ,
1463        MatGetDiagonal_MPISBAIJ,
1464        MatDiagonalScale_MPISBAIJ,
1465        MatNorm_MPISBAIJ,
1466 /*20*/ MatAssemblyBegin_MPISBAIJ,
1467        MatAssemblyEnd_MPISBAIJ,
1468        MatSetOption_MPISBAIJ,
1469        MatZeroEntries_MPISBAIJ,
1470 /*24*/ 0,
1471        0,
1472        0,
1473        0,
1474        0,
1475 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1476        0,
1477        0,
1478        0,
1479        0,
1480 /*34*/ MatDuplicate_MPISBAIJ,
1481        0,
1482        0,
1483        0,
1484        0,
1485 /*39*/ MatAXPY_MPISBAIJ,
1486        MatGetSubMatrices_MPISBAIJ,
1487        MatIncreaseOverlap_MPISBAIJ,
1488        MatGetValues_MPISBAIJ,
1489        MatCopy_MPISBAIJ,
1490 /*44*/ 0,
1491        MatScale_MPISBAIJ,
1492        0,
1493        0,
1494        0,
1495 /*49*/ MatSetBlockSize_MPISBAIJ,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*54*/ 0,
1501        0,
1502        MatSetUnfactored_MPISBAIJ,
1503        0,
1504        MatSetValuesBlocked_MPISBAIJ,
1505 /*59*/ 0,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*64*/ 0,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*74*/ 0,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*79*/ 0,
1526        0,
1527        0,
1528        0,
1529        MatLoad_MPISBAIJ,
1530 /*84*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*89*/ 0,
1536        0,
1537        0,
1538        0,
1539        0,
1540 /*94*/ 0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*99*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*104*/0,
1551        MatRealPart_MPISBAIJ,
1552        MatImaginaryPart_MPISBAIJ,
1553        MatGetRowUpperTriangular_MPISBAIJ,
1554        MatRestoreRowUpperTriangular_MPISBAIJ
1555 };
1556 
1557 
1558 EXTERN_C_BEGIN
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1561 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1562 {
1563   PetscFunctionBegin;
1564   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1565   *iscopy = PETSC_FALSE;
1566   PetscFunctionReturn(0);
1567 }
1568 EXTERN_C_END
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1573 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1574 {
1575   Mat_MPISBAIJ   *b;
1576   PetscErrorCode ierr;
1577   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1578 
1579   PetscFunctionBegin;
1580   if (bs < 0){
1581     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1582       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1583     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1584     bs   = PetscAbs(bs);
1585   }
1586   if ((d_nnz || o_nnz) && newbs != bs) {
1587     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1588   }
1589   bs = newbs;
1590 
1591   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1592   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1593   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1594   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1595 
1596   B->rmap->bs = B->cmap->bs = bs;
1597   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1598   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1599 
1600   if (d_nnz) {
1601     for (i=0; i<B->rmap->n/bs; i++) {
1602       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1603     }
1604   }
1605   if (o_nnz) {
1606     for (i=0; i<B->rmap->n/bs; i++) {
1607       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1608     }
1609   }
1610 
1611   b   = (Mat_MPISBAIJ*)B->data;
1612   mbs = B->rmap->n/bs;
1613   Mbs = B->rmap->N/bs;
1614   if (mbs*bs != B->rmap->n) {
1615     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1616   }
1617 
1618   B->rmap->bs  = bs;
1619   b->bs2 = bs*bs;
1620   b->mbs = mbs;
1621   b->nbs = mbs;
1622   b->Mbs = Mbs;
1623   b->Nbs = Mbs;
1624 
1625   for (i=0; i<=b->size; i++) {
1626     b->rangebs[i] = B->rmap->range[i]/bs;
1627   }
1628   b->rstartbs = B->rmap->rstart/bs;
1629   b->rendbs   = B->rmap->rend/bs;
1630 
1631   b->cstartbs = B->cmap->rstart/bs;
1632   b->cendbs   = B->cmap->rend/bs;
1633 
1634   if (!B->preallocated) {
1635     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1636     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1637     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1638     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1639     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1640     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1641     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1642     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1643     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1644   }
1645 
1646   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1647   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1648   B->preallocated = PETSC_TRUE;
1649   PetscFunctionReturn(0);
1650 }
1651 EXTERN_C_END
1652 
1653 EXTERN_C_BEGIN
1654 #if defined(PETSC_HAVE_MUMPS)
1655 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1656 #endif
1657 #if defined(PETSC_HAVE_SPOOLES)
1658 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1659 #endif
1660 #if defined(PETSC_HAVE_PASTIX)
1661 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1662 #endif
1663 EXTERN_C_END
1664 
1665 /*MC
1666    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1667    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1668 
1669    Options Database Keys:
1670 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1671 
1672   Level: beginner
1673 
1674 .seealso: MatCreateMPISBAIJ
1675 M*/
1676 
1677 EXTERN_C_BEGIN
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatCreate_MPISBAIJ"
1680 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1681 {
1682   Mat_MPISBAIJ   *b;
1683   PetscErrorCode ierr;
1684   PetscTruth     flg;
1685 
1686   PetscFunctionBegin;
1687 
1688   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1689   B->data = (void*)b;
1690   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1691 
1692   B->ops->destroy    = MatDestroy_MPISBAIJ;
1693   B->ops->view       = MatView_MPISBAIJ;
1694   B->mapping         = 0;
1695   B->assembled       = PETSC_FALSE;
1696 
1697   B->insertmode = NOT_SET_VALUES;
1698   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1699   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1700 
1701   /* build local table of row and column ownerships */
1702   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1703 
1704   /* build cache for off array entries formed */
1705   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1706   b->donotstash  = PETSC_FALSE;
1707   b->colmap      = PETSC_NULL;
1708   b->garray      = PETSC_NULL;
1709   b->roworiented = PETSC_TRUE;
1710 
1711   /* stuff used in block assembly */
1712   b->barray       = 0;
1713 
1714   /* stuff used for matrix vector multiply */
1715   b->lvec         = 0;
1716   b->Mvctx        = 0;
1717   b->slvec0       = 0;
1718   b->slvec0b      = 0;
1719   b->slvec1       = 0;
1720   b->slvec1a      = 0;
1721   b->slvec1b      = 0;
1722   b->sMvctx       = 0;
1723 
1724   /* stuff for MatGetRow() */
1725   b->rowindices   = 0;
1726   b->rowvalues    = 0;
1727   b->getrowactive = PETSC_FALSE;
1728 
1729   /* hash table stuff */
1730   b->ht           = 0;
1731   b->hd           = 0;
1732   b->ht_size      = 0;
1733   b->ht_flag      = PETSC_FALSE;
1734   b->ht_fact      = 0;
1735   b->ht_total_ct  = 0;
1736   b->ht_insert_ct = 0;
1737 
1738   b->in_loc       = 0;
1739   b->v_loc        = 0;
1740   b->n_loc        = 0;
1741   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1742     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1743     if (flg) {
1744       PetscReal fact = 1.39;
1745       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1746       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1747       if (fact <= 1.0) fact = 1.39;
1748       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1749       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1750     }
1751   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1752 
1753 #if defined(PETSC_HAVE_PASTIX)
1754   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1755 					   "MatGetFactor_mpisbaij_pastix",
1756 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1757 #endif
1758 #if defined(PETSC_HAVE_MUMPS)
1759   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1760                                      "MatGetFactor_mpisbaij_mumps",
1761                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1762 #endif
1763 #if defined(PETSC_HAVE_SPOOLES)
1764   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1765                                      "MatGetFactor_mpisbaij_spooles",
1766                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1767 #endif
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1769                                      "MatStoreValues_MPISBAIJ",
1770                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1772                                      "MatRetrieveValues_MPISBAIJ",
1773                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1775                                      "MatGetDiagonalBlock_MPISBAIJ",
1776                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1778                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1779                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1780   B->symmetric                  = PETSC_TRUE;
1781   B->structurally_symmetric     = PETSC_TRUE;
1782   B->symmetric_set              = PETSC_TRUE;
1783   B->structurally_symmetric_set = PETSC_TRUE;
1784   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1785   PetscFunctionReturn(0);
1786 }
1787 EXTERN_C_END
1788 
1789 /*MC
1790    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1791 
1792    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1793    and MATMPISBAIJ otherwise.
1794 
1795    Options Database Keys:
1796 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1797 
1798   Level: beginner
1799 
1800 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1801 M*/
1802 
1803 EXTERN_C_BEGIN
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatCreate_SBAIJ"
1806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809   PetscMPIInt    size;
1810 
1811   PetscFunctionBegin;
1812   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1813   if (size == 1) {
1814     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1815   } else {
1816     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1817   }
1818   PetscFunctionReturn(0);
1819 }
1820 EXTERN_C_END
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1824 /*@C
1825    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1826    the user should preallocate the matrix storage by setting the parameters
1827    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1828    performance can be increased by more than a factor of 50.
1829 
1830    Collective on Mat
1831 
1832    Input Parameters:
1833 +  A - the matrix
1834 .  bs   - size of blockk
1835 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1836            submatrix  (same for all local rows)
1837 .  d_nnz - array containing the number of block nonzeros in the various block rows
1838            in the upper triangular and diagonal part of the in diagonal portion of the local
1839            (possibly different for each block row) or PETSC_NULL.  You must leave room
1840            for the diagonal entry even if it is zero.
1841 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1842            submatrix (same for all local rows).
1843 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1844            off-diagonal portion of the local submatrix (possibly different for
1845            each block row) or PETSC_NULL.
1846 
1847 
1848    Options Database Keys:
1849 .   -mat_no_unroll - uses code that does not unroll the loops in the
1850                      block calculations (much slower)
1851 .   -mat_block_size - size of the blocks to use
1852 
1853    Notes:
1854 
1855    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1856    than it must be used on all processors that share the object for that argument.
1857 
1858    If the *_nnz parameter is given then the *_nz parameter is ignored
1859 
1860    Storage Information:
1861    For a square global matrix we define each processor's diagonal portion
1862    to be its local rows and the corresponding columns (a square submatrix);
1863    each processor's off-diagonal portion encompasses the remainder of the
1864    local matrix (a rectangular submatrix).
1865 
1866    The user can specify preallocated storage for the diagonal part of
1867    the local submatrix with either d_nz or d_nnz (not both).  Set
1868    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1869    memory allocation.  Likewise, specify preallocated storage for the
1870    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1871 
1872    You can call MatGetInfo() to get information on how effective the preallocation was;
1873    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1874    You can also run with the option -info and look for messages with the string
1875    malloc in them to see if additional memory allocation was needed.
1876 
1877    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1878    the figure below we depict these three local rows and all columns (0-11).
1879 
1880 .vb
1881            0 1 2 3 4 5 6 7 8 9 10 11
1882           -------------------
1883    row 3  |  o o o d d d o o o o o o
1884    row 4  |  o o o d d d o o o o o o
1885    row 5  |  o o o d d d o o o o o o
1886           -------------------
1887 .ve
1888 
1889    Thus, any entries in the d locations are stored in the d (diagonal)
1890    submatrix, and any entries in the o locations are stored in the
1891    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1892    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1893 
1894    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1895    plus the diagonal part of the d matrix,
1896    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1897    In general, for PDE problems in which most nonzeros are near the diagonal,
1898    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1899    or you will get TERRIBLE performance; see the users' manual chapter on
1900    matrices.
1901 
1902    Level: intermediate
1903 
1904 .keywords: matrix, block, aij, compressed row, sparse, parallel
1905 
1906 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1907 @*/
1908 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1909 {
1910   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1911 
1912   PetscFunctionBegin;
1913   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1914   if (f) {
1915     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1916   }
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatCreateMPISBAIJ"
1922 /*@C
1923    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1924    (block compressed row).  For good matrix assembly performance
1925    the user should preallocate the matrix storage by setting the parameters
1926    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1927    performance can be increased by more than a factor of 50.
1928 
1929    Collective on MPI_Comm
1930 
1931    Input Parameters:
1932 +  comm - MPI communicator
1933 .  bs   - size of blockk
1934 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1935            This value should be the same as the local size used in creating the
1936            y vector for the matrix-vector product y = Ax.
1937 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1938            This value should be the same as the local size used in creating the
1939            x vector for the matrix-vector product y = Ax.
1940 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1941 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1942 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1943            submatrix  (same for all local rows)
1944 .  d_nnz - array containing the number of block nonzeros in the various block rows
1945            in the upper triangular portion of the in diagonal portion of the local
1946            (possibly different for each block block row) or PETSC_NULL.
1947            You must leave room for the diagonal entry even if it is zero.
1948 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1949            submatrix (same for all local rows).
1950 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1951            off-diagonal portion of the local submatrix (possibly different for
1952            each block row) or PETSC_NULL.
1953 
1954    Output Parameter:
1955 .  A - the matrix
1956 
1957    Options Database Keys:
1958 .   -mat_no_unroll - uses code that does not unroll the loops in the
1959                      block calculations (much slower)
1960 .   -mat_block_size - size of the blocks to use
1961 .   -mat_mpi - use the parallel matrix data structures even on one processor
1962                (defaults to using SeqBAIJ format on one processor)
1963 
1964    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1965    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1966    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1967 
1968    Notes:
1969    The number of rows and columns must be divisible by blocksize.
1970    This matrix type does not support complex Hermitian operation.
1971 
1972    The user MUST specify either the local or global matrix dimensions
1973    (possibly both).
1974 
1975    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1976    than it must be used on all processors that share the object for that argument.
1977 
1978    If the *_nnz parameter is given then the *_nz parameter is ignored
1979 
1980    Storage Information:
1981    For a square global matrix we define each processor's diagonal portion
1982    to be its local rows and the corresponding columns (a square submatrix);
1983    each processor's off-diagonal portion encompasses the remainder of the
1984    local matrix (a rectangular submatrix).
1985 
1986    The user can specify preallocated storage for the diagonal part of
1987    the local submatrix with either d_nz or d_nnz (not both).  Set
1988    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1989    memory allocation.  Likewise, specify preallocated storage for the
1990    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1991 
1992    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1993    the figure below we depict these three local rows and all columns (0-11).
1994 
1995 .vb
1996            0 1 2 3 4 5 6 7 8 9 10 11
1997           -------------------
1998    row 3  |  o o o d d d o o o o o o
1999    row 4  |  o o o d d d o o o o o o
2000    row 5  |  o o o d d d o o o o o o
2001           -------------------
2002 .ve
2003 
2004    Thus, any entries in the d locations are stored in the d (diagonal)
2005    submatrix, and any entries in the o locations are stored in the
2006    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2007    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2008 
2009    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2010    plus the diagonal part of the d matrix,
2011    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2012    In general, for PDE problems in which most nonzeros are near the diagonal,
2013    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2014    or you will get TERRIBLE performance; see the users' manual chapter on
2015    matrices.
2016 
2017    Level: intermediate
2018 
2019 .keywords: matrix, block, aij, compressed row, sparse, parallel
2020 
2021 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2022 @*/
2023 
2024 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2025 {
2026   PetscErrorCode ierr;
2027   PetscMPIInt    size;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2031   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2032   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2033   if (size > 1) {
2034     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2035     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2036   } else {
2037     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2038     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2039   }
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2046 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2047 {
2048   Mat            mat;
2049   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2050   PetscErrorCode ierr;
2051   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2052   PetscScalar    *array;
2053 
2054   PetscFunctionBegin;
2055   *newmat       = 0;
2056   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2057   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2058   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2059   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2060   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2061   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2062 
2063   mat->factor       = matin->factor;
2064   mat->preallocated = PETSC_TRUE;
2065   mat->assembled    = PETSC_TRUE;
2066   mat->insertmode   = NOT_SET_VALUES;
2067 
2068   a = (Mat_MPISBAIJ*)mat->data;
2069   a->bs2   = oldmat->bs2;
2070   a->mbs   = oldmat->mbs;
2071   a->nbs   = oldmat->nbs;
2072   a->Mbs   = oldmat->Mbs;
2073   a->Nbs   = oldmat->Nbs;
2074 
2075 
2076   a->size         = oldmat->size;
2077   a->rank         = oldmat->rank;
2078   a->donotstash   = oldmat->donotstash;
2079   a->roworiented  = oldmat->roworiented;
2080   a->rowindices   = 0;
2081   a->rowvalues    = 0;
2082   a->getrowactive = PETSC_FALSE;
2083   a->barray       = 0;
2084   a->rstartbs    = oldmat->rstartbs;
2085   a->rendbs      = oldmat->rendbs;
2086   a->cstartbs    = oldmat->cstartbs;
2087   a->cendbs      = oldmat->cendbs;
2088 
2089   /* hash table stuff */
2090   a->ht           = 0;
2091   a->hd           = 0;
2092   a->ht_size      = 0;
2093   a->ht_flag      = oldmat->ht_flag;
2094   a->ht_fact      = oldmat->ht_fact;
2095   a->ht_total_ct  = 0;
2096   a->ht_insert_ct = 0;
2097 
2098   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2099   if (oldmat->colmap) {
2100 #if defined (PETSC_USE_CTABLE)
2101     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2102 #else
2103     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2104     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2105     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2106 #endif
2107   } else a->colmap = 0;
2108 
2109   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2110     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2111     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2112     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2113   } else a->garray = 0;
2114 
2115   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2116   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2117   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2118   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2120 
2121   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2122   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2123   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2124   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2125 
2126   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2127   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2128   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2130   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2131   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2132   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2133   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2139 
2140   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2141   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2142   a->sMvctx = oldmat->sMvctx;
2143   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2144 
2145   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2146   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2147   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2148   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2149   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2150   *newmat = mat;
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatLoad_MPISBAIJ"
2156 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2157 {
2158   Mat            A;
2159   PetscErrorCode ierr;
2160   PetscInt       i,nz,j,rstart,rend;
2161   PetscScalar    *vals,*buf;
2162   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2163   MPI_Status     status;
2164   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2165   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2166   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2167   PetscInt       bs=1,Mbs,mbs,extra_rows;
2168   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2169   PetscInt       dcount,kmax,k,nzcount,tmp;
2170   int            fd;
2171 
2172   PetscFunctionBegin;
2173   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2174     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2175   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2176 
2177   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2178   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2179   if (!rank) {
2180     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2181     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2182     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2183     if (header[3] < 0) {
2184       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2185     }
2186   }
2187 
2188   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2189   M = header[1]; N = header[2];
2190 
2191   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2192 
2193   /*
2194      This code adds extra rows to make sure the number of rows is
2195      divisible by the blocksize
2196   */
2197   Mbs        = M/bs;
2198   extra_rows = bs - M + bs*(Mbs);
2199   if (extra_rows == bs) extra_rows = 0;
2200   else                  Mbs++;
2201   if (extra_rows &&!rank) {
2202     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2203   }
2204 
2205   /* determine ownership of all rows */
2206   mbs        = Mbs/size + ((Mbs % size) > rank);
2207   m          = mbs*bs;
2208   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2209   mmbs       = PetscMPIIntCast(mbs);
2210   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2211   rowners[0] = 0;
2212   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2213   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2214   rstart = rowners[rank];
2215   rend   = rowners[rank+1];
2216 
2217   /* distribute row lengths to all processors */
2218   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2219   if (!rank) {
2220     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2221     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2222     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2223     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2224     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2225     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2226     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2227   } else {
2228     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2229   }
2230 
2231   if (!rank) {   /* procs[0] */
2232     /* calculate the number of nonzeros on each processor */
2233     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2234     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2235     for (i=0; i<size; i++) {
2236       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2237         procsnz[i] += rowlengths[j];
2238       }
2239     }
2240     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2241 
2242     /* determine max buffer needed and allocate it */
2243     maxnz = 0;
2244     for (i=0; i<size; i++) {
2245       maxnz = PetscMax(maxnz,procsnz[i]);
2246     }
2247     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2248 
2249     /* read in my part of the matrix column indices  */
2250     nz     = procsnz[0];
2251     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2252     mycols = ibuf;
2253     if (size == 1)  nz -= extra_rows;
2254     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2255     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2256 
2257     /* read in every ones (except the last) and ship off */
2258     for (i=1; i<size-1; i++) {
2259       nz   = procsnz[i];
2260       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2261       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2262     }
2263     /* read in the stuff for the last proc */
2264     if (size != 1) {
2265       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2266       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2267       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2268       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2269     }
2270     ierr = PetscFree(cols);CHKERRQ(ierr);
2271   } else {  /* procs[i], i>0 */
2272     /* determine buffer space needed for message */
2273     nz = 0;
2274     for (i=0; i<m; i++) {
2275       nz += locrowlens[i];
2276     }
2277     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2278     mycols = ibuf;
2279     /* receive message of column indices*/
2280     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2281     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2282     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2283   }
2284 
2285   /* loop over local rows, determining number of off diagonal entries */
2286   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2287   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2288   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2289   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2290   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2291   rowcount = 0;
2292   nzcount  = 0;
2293   for (i=0; i<mbs; i++) {
2294     dcount  = 0;
2295     odcount = 0;
2296     for (j=0; j<bs; j++) {
2297       kmax = locrowlens[rowcount];
2298       for (k=0; k<kmax; k++) {
2299         tmp = mycols[nzcount++]/bs; /* block col. index */
2300         if (!mask[tmp]) {
2301           mask[tmp] = 1;
2302           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2303           else masked1[dcount++] = tmp; /* entry in diag portion */
2304         }
2305       }
2306       rowcount++;
2307     }
2308 
2309     dlens[i]  = dcount;  /* d_nzz[i] */
2310     odlens[i] = odcount; /* o_nzz[i] */
2311 
2312     /* zero out the mask elements we set */
2313     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2314     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2315   }
2316 
2317   /* create our matrix */
2318   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2319   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2320   ierr = MatSetType(A,type);CHKERRQ(ierr);
2321   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2322   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2323 
2324   if (!rank) {
2325     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2326     /* read in my part of the matrix numerical values  */
2327     nz = procsnz[0];
2328     vals = buf;
2329     mycols = ibuf;
2330     if (size == 1)  nz -= extra_rows;
2331     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2332     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2333 
2334     /* insert into matrix */
2335     jj      = rstart*bs;
2336     for (i=0; i<m; i++) {
2337       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2338       mycols += locrowlens[i];
2339       vals   += locrowlens[i];
2340       jj++;
2341     }
2342 
2343     /* read in other processors (except the last one) and ship out */
2344     for (i=1; i<size-1; i++) {
2345       nz   = procsnz[i];
2346       vals = buf;
2347       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2348       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2349     }
2350     /* the last proc */
2351     if (size != 1){
2352       nz   = procsnz[i] - extra_rows;
2353       vals = buf;
2354       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2355       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2356       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2357     }
2358     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2359 
2360   } else {
2361     /* receive numeric values */
2362     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2363 
2364     /* receive message of values*/
2365     vals   = buf;
2366     mycols = ibuf;
2367     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2368     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2369     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2370 
2371     /* insert into matrix */
2372     jj      = rstart*bs;
2373     for (i=0; i<m; i++) {
2374       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2375       mycols += locrowlens[i];
2376       vals   += locrowlens[i];
2377       jj++;
2378     }
2379   }
2380 
2381   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2382   ierr = PetscFree(buf);CHKERRQ(ierr);
2383   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2384   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2385   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2386   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2387   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   *newmat = A;
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2395 /*XXXXX@
2396    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2397 
2398    Input Parameters:
2399 .  mat  - the matrix
2400 .  fact - factor
2401 
2402    Collective on Mat
2403 
2404    Level: advanced
2405 
2406   Notes:
2407    This can also be set by the command line option: -mat_use_hash_table fact
2408 
2409 .keywords: matrix, hashtable, factor, HT
2410 
2411 .seealso: MatSetOption()
2412 @XXXXX*/
2413 
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2417 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2418 {
2419   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2420   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2421   PetscReal      atmp;
2422   PetscReal      *work,*svalues,*rvalues;
2423   PetscErrorCode ierr;
2424   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2425   PetscMPIInt    rank,size;
2426   PetscInt       *rowners_bs,dest,count,source;
2427   PetscScalar    *va;
2428   MatScalar      *ba;
2429   MPI_Status     stat;
2430 
2431   PetscFunctionBegin;
2432   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2433   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2434   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2435 
2436   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2437   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2438 
2439   bs   = A->rmap->bs;
2440   mbs  = a->mbs;
2441   Mbs  = a->Mbs;
2442   ba   = b->a;
2443   bi   = b->i;
2444   bj   = b->j;
2445 
2446   /* find ownerships */
2447   rowners_bs = A->rmap->range;
2448 
2449   /* each proc creates an array to be distributed */
2450   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2451   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2452 
2453   /* row_max for B */
2454   if (rank != size-1){
2455     for (i=0; i<mbs; i++) {
2456       ncols = bi[1] - bi[0]; bi++;
2457       brow  = bs*i;
2458       for (j=0; j<ncols; j++){
2459         bcol = bs*(*bj);
2460         for (kcol=0; kcol<bs; kcol++){
2461           col = bcol + kcol;                 /* local col index */
2462           col += rowners_bs[rank+1];      /* global col index */
2463           for (krow=0; krow<bs; krow++){
2464             atmp = PetscAbsScalar(*ba); ba++;
2465             row = brow + krow;    /* local row index */
2466             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2467             if (work[col] < atmp) work[col] = atmp;
2468           }
2469         }
2470         bj++;
2471       }
2472     }
2473 
2474     /* send values to its owners */
2475     for (dest=rank+1; dest<size; dest++){
2476       svalues = work + rowners_bs[dest];
2477       count   = rowners_bs[dest+1]-rowners_bs[dest];
2478       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2479     }
2480   }
2481 
2482   /* receive values */
2483   if (rank){
2484     rvalues = work;
2485     count   = rowners_bs[rank+1]-rowners_bs[rank];
2486     for (source=0; source<rank; source++){
2487       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2488       /* process values */
2489       for (i=0; i<count; i++){
2490         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2491       }
2492     }
2493   }
2494 
2495   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2496   ierr = PetscFree(work);CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatSOR_MPISBAIJ"
2502 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2503 {
2504   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2505   PetscErrorCode ierr;
2506   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2507   PetscScalar    *x,*b,*ptr,*from;
2508   Vec            bb1;
2509 
2510   PetscFunctionBegin;
2511   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2512   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2513 
2514   if (flag == SOR_APPLY_UPPER) {
2515     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2516     PetscFunctionReturn(0);
2517   }
2518 
2519   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2520     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2521       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2522       its--;
2523     }
2524 
2525     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2526     while (its--){
2527 
2528       /* lower triangular part: slvec0b = - B^T*xx */
2529       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2530 
2531       /* copy xx into slvec0a */
2532       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2533       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2534       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2535       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2536 
2537       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2538 
2539       /* copy bb into slvec1a */
2540       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2541       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2542       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2543       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2544 
2545       /* set slvec1b = 0 */
2546       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2547 
2548       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2549       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2550       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2551       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2552 
2553       /* upper triangular part: bb1 = bb1 - B*x */
2554       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2555 
2556       /* local diagonal sweep */
2557       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2558     }
2559     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2560   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2561     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2562   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2563     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2564   } else if (flag & SOR_EISENSTAT) {
2565     Vec               xx1;
2566     PetscTruth        hasop;
2567     const PetscScalar *diag;
2568     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2569     PetscInt          i,n;
2570 
2571     if (!mat->xx1) {
2572       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2573       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2574     }
2575     xx1 = mat->xx1;
2576     bb1 = mat->bb1;
2577 
2578     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2579 
2580     if (!mat->diag) {
2581       /* this is wrong for same matrix with new nonzero values */
2582       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2583       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2584     }
2585     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2586 
2587     if (hasop) {
2588       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2589       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2590     } else {
2591       /*
2592           These two lines are replaced by code that may be a bit faster for a good compiler
2593       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2594       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2595       */
2596       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2597       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2598       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2599       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2600       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2601       if (omega == 1.0) {
2602 	for (i=0; i<n; i++) {
2603 	  sl[i] = b[i] - diag[i]*x[i];
2604 	}
2605         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2606       } else {
2607 	for (i=0; i<n; i++) {
2608 	  sl[i] = b[i] + scale*diag[i]*x[i];
2609 	}
2610         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2611       }
2612       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2613       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2614       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2615       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2616     }
2617 
2618     /* multiply off-diagonal portion of matrix */
2619     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2620     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2621     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2622     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2623     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2624     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2625     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2626     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2627     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2628     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2629 
2630     /* local sweep */
2631     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2632     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2633   } else {
2634     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2635   }
2636   PetscFunctionReturn(0);
2637 }
2638 
2639 #undef __FUNCT__
2640 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2641 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2642 {
2643   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2644   PetscErrorCode ierr;
2645   Vec            lvec1,bb1;
2646 
2647   PetscFunctionBegin;
2648   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2649   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2650 
2651   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2652     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2653       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2654       its--;
2655     }
2656 
2657     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2658     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2659     while (its--){
2660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2661 
2662       /* lower diagonal part: bb1 = bb - B^T*xx */
2663       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2664       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2665 
2666       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2667       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2668       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2669 
2670       /* upper diagonal part: bb1 = bb1 - B*x */
2671       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2672       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2673 
2674       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2675 
2676       /* diagonal sweep */
2677       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2678     }
2679     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2680     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2681   } else {
2682     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2683   }
2684   PetscFunctionReturn(0);
2685 }
2686 
2687