xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 35e7444da1e6edf29ca585d75dc3fe3d2c63a6e4)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap->rstart;
146   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
147   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169   if (v) PetscValidScalarPointer(v,6);
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (!baij->donotstash) {
350         if (roworiented) {
351           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
352         } else {
353           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
354         }
355       }
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatGetValues_MPISBAIJ"
363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
364 {
365   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
366   PetscErrorCode ierr;
367   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
368   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
373     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
374     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
375       row = idxm[i] - bsrstart;
376       for (j=0; j<n; j++) {
377         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
378         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
379         if (idxn[j] >= bscstart && idxn[j] < bscend){
380           col = idxn[j] - bscstart;
381           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
382         } else {
383           if (!baij->colmap) {
384             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
385           }
386 #if defined (PETSC_USE_CTABLE)
387           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
388           data --;
389 #else
390           data = baij->colmap[idxn[j]/bs]-1;
391 #endif
392           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
393           else {
394             col  = data + idxn[j]%bs;
395             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
396           }
397         }
398       }
399     } else {
400       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
401     }
402   }
403  PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatNorm_MPISBAIJ"
408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
409 {
410   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscReal      sum[2],*lnorm2;
413 
414   PetscFunctionBegin;
415   if (baij->size == 1) {
416     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
417   } else {
418     if (type == NORM_FROBENIUS) {
419       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
420       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
421       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
422       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
423       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
424       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
425       *norm = sqrt(sum[0] + 2*sum[1]);
426       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
427     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
428       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
429       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
430       PetscReal    *rsum,*rsum2,vabs;
431       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
432       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
433       MatScalar    *v;
434 
435       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
436       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
437       /* Amat */
438       v = amat->a; jj = amat->j;
439       for (brow=0; brow<mbs; brow++) {
440         grow = bs*(rstart + brow);
441         nz = amat->i[brow+1] - amat->i[brow];
442         for (bcol=0; bcol<nz; bcol++){
443           gcol = bs*(rstart + *jj); jj++;
444           for (col=0; col<bs; col++){
445             for (row=0; row<bs; row++){
446               vabs = PetscAbsScalar(*v); v++;
447               rsum[gcol+col] += vabs;
448               /* non-diagonal block */
449               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
450             }
451           }
452         }
453       }
454       /* Bmat */
455       v = bmat->a; jj = bmat->j;
456       for (brow=0; brow<mbs; brow++) {
457         grow = bs*(rstart + brow);
458         nz = bmat->i[brow+1] - bmat->i[brow];
459         for (bcol=0; bcol<nz; bcol++){
460           gcol = bs*garray[*jj]; jj++;
461           for (col=0; col<bs; col++){
462             for (row=0; row<bs; row++){
463               vabs = PetscAbsScalar(*v); v++;
464               rsum[gcol+col] += vabs;
465               rsum[grow+row] += vabs;
466             }
467           }
468         }
469       }
470       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
471       *norm = 0.0;
472       for (col=0; col<mat->cmap->N; col++) {
473         if (rsum2[col] > *norm) *norm = rsum2[col];
474       }
475       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
476     } else {
477       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
486 {
487   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
488   PetscErrorCode ierr;
489   PetscInt       nstash,reallocs;
490   InsertMode     addv;
491 
492   PetscFunctionBegin;
493   if (baij->donotstash) {
494     PetscFunctionReturn(0);
495   }
496 
497   /* make sure all processors are either in INSERTMODE or ADDMODE */
498   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
499   if (addv == (ADD_VALUES|INSERT_VALUES)) {
500     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   }
502   mat->insertmode = addv; /* in case this processor had no cache */
503 
504   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
505   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
506   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
507   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
508   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
509   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
515 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
516 {
517   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
518   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
519   PetscErrorCode ierr;
520   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
521   PetscInt       *row,*col;
522   PetscTruth     other_disassembled;
523   PetscMPIInt    n;
524   PetscTruth     r1,r2,r3;
525   MatScalar      *val;
526   InsertMode     addv = mat->insertmode;
527 
528   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
529   PetscFunctionBegin;
530 
531   if (!baij->donotstash) {
532     while (1) {
533       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
534       if (!flg) break;
535 
536       for (i=0; i<n;) {
537         /* Now identify the consecutive vals belonging to the same row */
538         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
539         if (j < n) ncols = j-i;
540         else       ncols = n-i;
541         /* Now assemble all these values with a single function call */
542         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
543         i = j;
544       }
545     }
546     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
547     /* Now process the block-stash. Since the values are stashed column-oriented,
548        set the roworiented flag to column oriented, and after MatSetValues()
549        restore the original flags */
550     r1 = baij->roworiented;
551     r2 = a->roworiented;
552     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
553     baij->roworiented = PETSC_FALSE;
554     a->roworiented    = PETSC_FALSE;
555     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
556     while (1) {
557       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
558       if (!flg) break;
559 
560       for (i=0; i<n;) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
566         i = j;
567       }
568     }
569     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
570     baij->roworiented = r1;
571     a->roworiented    = r2;
572     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
573   }
574 
575   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
576   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
577 
578   /* determine if any processor has disassembled, if so we must
579      also disassemble ourselfs, in order that we may reassemble. */
580   /*
581      if nonzero structure of submatrix B cannot change then we know that
582      no processor disassembled thus we can skip this stuff
583   */
584   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
585     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
586     if (mat->was_assembled && !other_disassembled) {
587       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
588     }
589   }
590 
591   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
592     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
593   }
594   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
595   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
596   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
597 
598   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
599   baij->rowvalues = 0;
600 
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
605 #undef __FUNCT__
606 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
607 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
608 {
609   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
610   PetscErrorCode    ierr;
611   PetscInt          bs = mat->rmap->bs;
612   PetscMPIInt       size = baij->size,rank = baij->rank;
613   PetscTruth        iascii,isdraw;
614   PetscViewer       sviewer;
615   PetscViewerFormat format;
616 
617   PetscFunctionBegin;
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
619   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
620   if (iascii) {
621     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
622     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
623       MatInfo info;
624       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
625       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
627               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
628               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
631       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
633       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
634       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
635       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_INFO) {
638       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
639       PetscFunctionReturn(0);
640     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
641       PetscFunctionReturn(0);
642     }
643   }
644 
645   if (isdraw) {
646     PetscDraw  draw;
647     PetscTruth isnull;
648     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
649     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
650   }
651 
652   if (size == 1) {
653     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
654     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
655   } else {
656     /* assemble the entire matrix onto first processor. */
657     Mat          A;
658     Mat_SeqSBAIJ *Aloc;
659     Mat_SeqBAIJ  *Bloc;
660     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
661     MatScalar    *a;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
676     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++; a += bs;
687         }
688       }
689     }
690     /* copy over the B part */
691     Bloc = (Mat_SeqBAIJ*)baij->B->data;
692     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
693     for (i=0; i<mbs; i++) {
694 
695       rvals[0] = bs*(baij->rstartbs + i);
696       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
697       for (j=ai[i]; j<ai[i+1]; j++) {
698         col = baij->garray[aj[j]]*bs;
699         for (k=0; k<bs; k++) {
700           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
701           col++; a += bs;
702         }
703       }
704     }
705     ierr = PetscFree(rvals);CHKERRQ(ierr);
706     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     /*
709        Everyone has to call to draw the matrix since the graphics waits are
710        synchronized across all processors that share the PetscDraw object
711     */
712     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
713     if (!rank) {
714       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
715       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
716     }
717     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     ierr = MatDestroy(A);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPISBAIJ"
725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
726 {
727   PetscErrorCode ierr;
728   PetscTruth     iascii,isdraw,issocket,isbinary;
729 
730   PetscFunctionBegin;
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
735   if (iascii || isdraw || issocket || isbinary) {
736     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
737   } else {
738     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatDestroy_MPISBAIJ"
745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751 #if defined(PETSC_USE_LOG)
752   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
753 #endif
754   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
755   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
756   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
757   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
758 #if defined (PETSC_USE_CTABLE)
759   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
760 #else
761   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
762 #endif
763   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
764   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
765   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
766   if (baij->slvec0) {
767     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
768     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
769   }
770   if (baij->slvec1) {
771     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
772     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
773     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
774   }
775   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
776   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
777   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
778   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
779   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
780   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
781   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
782 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
783   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
784 #endif
785   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
788   ierr = PetscFree(baij);CHKERRQ(ierr);
789 
790   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
801 {
802   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
805   PetscScalar    *x,*from;
806 
807   PetscFunctionBegin;
808   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
809   if (nt != A->cmap->n) {
810     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
811   }
812 
813   /* diagonal part */
814   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
815   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
816 
817   /* subdiagonal part */
818   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
819 
820   /* copy x into the vec slvec0 */
821   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
825   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
826   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
827 
828   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830   /* supperdiagonal part */
831   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatMult_MPISBAIJ"
837 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
838 {
839   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
840   PetscErrorCode ierr;
841   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
842   PetscScalar    *x,*from;
843 
844   PetscFunctionBegin;
845   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
846   if (nt != A->cmap->n) {
847     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
848   }
849 
850   /* diagonal part */
851   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
852   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
853 
854   /* subdiagonal part */
855   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
856 
857   /* copy x into the vec slvec0 */
858   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
862   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
863   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
864 
865   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   /* supperdiagonal part */
868   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
874 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
875 {
876   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
877   PetscErrorCode ierr;
878   PetscInt       nt;
879 
880   PetscFunctionBegin;
881   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
882   if (nt != A->cmap->n) {
883     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
884   }
885   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
886   if (nt != A->rmap->N) {
887     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
888   }
889 
890   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891   /* do diagonal part */
892   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
893   /* do supperdiagonal part */
894   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
896   /* do subdiagonal part */
897   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
898   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
899   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
900 
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
906 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
907 {
908   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
909   PetscErrorCode ierr;
910   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
911   PetscScalar    *x,*from,zero=0.0;
912 
913   PetscFunctionBegin;
914   /*
915   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
916   PetscSynchronizedFlush(((PetscObject)A)->comm);
917   */
918   /* diagonal part */
919   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
920   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
921 
922   /* subdiagonal part */
923   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
924 
925   /* copy x into the vec slvec0 */
926   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
927   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
928   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
929   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
930 
931   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
933   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
934 
935   /* supperdiagonal part */
936   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
937 
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
943 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
944 {
945   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950   /* do diagonal part */
951   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
952   /* do supperdiagonal part */
953   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
955 
956   /* do subdiagonal part */
957   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
958   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
959   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
960 
961   PetscFunctionReturn(0);
962 }
963 
964 /*
965   This only works correctly for square matrices where the subblock A->A is the
966    diagonal block
967 */
968 #undef __FUNCT__
969 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
970 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
977   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatScale_MPISBAIJ"
983 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
984 {
985   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
990   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatGetRow_MPISBAIJ"
996 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
997 {
998   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
999   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1000   PetscErrorCode ierr;
1001   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1002   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1003   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1004 
1005   PetscFunctionBegin;
1006   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1007   mat->getrowactive = PETSC_TRUE;
1008 
1009   if (!mat->rowvalues && (idx || v)) {
1010     /*
1011         allocate enough space to hold information from the longest row.
1012     */
1013     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1014     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1015     PetscInt     max = 1,mbs = mat->mbs,tmp;
1016     for (i=0; i<mbs; i++) {
1017       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1018       if (max < tmp) { max = tmp; }
1019     }
1020     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1021   }
1022 
1023   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1024   lrow = row - brstart;  /* local row index */
1025 
1026   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1027   if (!v)   {pvA = 0; pvB = 0;}
1028   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1029   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1030   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1031   nztot = nzA + nzB;
1032 
1033   cmap  = mat->garray;
1034   if (v  || idx) {
1035     if (nztot) {
1036       /* Sort by increasing column numbers, assuming A and B already sorted */
1037       PetscInt imark = -1;
1038       if (v) {
1039         *v = v_p = mat->rowvalues;
1040         for (i=0; i<nzB; i++) {
1041           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1042           else break;
1043         }
1044         imark = i;
1045         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1046         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1047       }
1048       if (idx) {
1049         *idx = idx_p = mat->rowindices;
1050         if (imark > -1) {
1051           for (i=0; i<imark; i++) {
1052             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1053           }
1054         } else {
1055           for (i=0; i<nzB; i++) {
1056             if (cmap[cworkB[i]/bs] < cstart)
1057               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1058             else break;
1059           }
1060           imark = i;
1061         }
1062         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1063         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1064       }
1065     } else {
1066       if (idx) *idx = 0;
1067       if (v)   *v   = 0;
1068     }
1069   }
1070   *nz = nztot;
1071   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1072   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1078 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1079 {
1080   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1081 
1082   PetscFunctionBegin;
1083   if (!baij->getrowactive) {
1084     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1085   }
1086   baij->getrowactive = PETSC_FALSE;
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1092 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1093 {
1094   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1095   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1096 
1097   PetscFunctionBegin;
1098   aA->getrow_utriangular = PETSC_TRUE;
1099   PetscFunctionReturn(0);
1100 }
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1103 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1107 
1108   PetscFunctionBegin;
1109   aA->getrow_utriangular = PETSC_FALSE;
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1115 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1116 {
1117   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1122   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1128 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1129 {
1130   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1135   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1141 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1142 {
1143   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1144   PetscErrorCode ierr;
1145 
1146   PetscFunctionBegin;
1147   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1148   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1154 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1155 {
1156   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1157   Mat            A = a->A,B = a->B;
1158   PetscErrorCode ierr;
1159   PetscReal      isend[5],irecv[5];
1160 
1161   PetscFunctionBegin;
1162   info->block_size     = (PetscReal)matin->rmap->bs;
1163   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1164   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1165   isend[3] = info->memory;  isend[4] = info->mallocs;
1166   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1167   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1168   isend[3] += info->memory;  isend[4] += info->mallocs;
1169   if (flag == MAT_LOCAL) {
1170     info->nz_used      = isend[0];
1171     info->nz_allocated = isend[1];
1172     info->nz_unneeded  = isend[2];
1173     info->memory       = isend[3];
1174     info->mallocs      = isend[4];
1175   } else if (flag == MAT_GLOBAL_MAX) {
1176     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1177     info->nz_used      = irecv[0];
1178     info->nz_allocated = irecv[1];
1179     info->nz_unneeded  = irecv[2];
1180     info->memory       = irecv[3];
1181     info->mallocs      = irecv[4];
1182   } else if (flag == MAT_GLOBAL_SUM) {
1183     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1184     info->nz_used      = irecv[0];
1185     info->nz_allocated = irecv[1];
1186     info->nz_unneeded  = irecv[2];
1187     info->memory       = irecv[3];
1188     info->mallocs      = irecv[4];
1189   } else {
1190     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1191   }
1192   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1193   info->fill_ratio_needed = 0;
1194   info->factor_mallocs    = 0;
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1200 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1201 {
1202   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1203   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   switch (op) {
1208   case MAT_NEW_NONZERO_LOCATIONS:
1209   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1210   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1211   case MAT_KEEP_NONZERO_PATTERN:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1214     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1215     break;
1216   case MAT_ROW_ORIENTED:
1217     a->roworiented = flg;
1218     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1219     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1220     break;
1221   case MAT_NEW_DIAGONALS:
1222     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1223     break;
1224   case MAT_IGNORE_OFF_PROC_ENTRIES:
1225     a->donotstash = flg;
1226     break;
1227   case MAT_USE_HASH_TABLE:
1228     a->ht_flag = flg;
1229     break;
1230   case MAT_HERMITIAN:
1231     if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1232     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1233     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscTruth     flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg)
1286       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1287   }
1288   if (!ll) PetscFunctionReturn(0);
1289 
1290   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1291   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1292 
1293   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1294   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1295 
1296   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297 
1298   /* left diagonalscale the off-diagonal part */
1299   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1300 
1301   /* scale the diagonal part */
1302   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1303 
1304   /* right diagonalscale the off-diagonal part */
1305   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1312 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatEqual_MPISBAIJ"
1326 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1327 {
1328   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1329   Mat            a,b,c,d;
1330   PetscTruth     flg;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   a = matA->A; b = matA->B;
1335   c = matB->A; d = matB->B;
1336 
1337   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1338   if (flg) {
1339     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1340   }
1341   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatCopy_MPISBAIJ"
1347 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1348 {
1349   PetscErrorCode ierr;
1350   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1351   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1352 
1353   PetscFunctionBegin;
1354   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1355   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1356     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1357     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1358     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1359   } else {
1360     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1361     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1368 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #include "petscblaslapack.h"
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1380 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1381 {
1382   PetscErrorCode ierr;
1383   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1384   PetscBLASInt   bnz,one=1;
1385   Mat_SeqSBAIJ   *xa,*ya;
1386   Mat_SeqBAIJ    *xb,*yb;
1387 
1388   PetscFunctionBegin;
1389   if (str == SAME_NONZERO_PATTERN) {
1390     PetscScalar alpha = a;
1391     xa = (Mat_SeqSBAIJ *)xx->A->data;
1392     ya = (Mat_SeqSBAIJ *)yy->A->data;
1393     bnz = PetscBLASIntCast(xa->nz);
1394     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1395     xb = (Mat_SeqBAIJ *)xx->B->data;
1396     yb = (Mat_SeqBAIJ *)yy->B->data;
1397     bnz = PetscBLASIntCast(xb->nz);
1398     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1399   } else {
1400     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1401     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1402     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1409 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1410 {
1411   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1412   PetscInt        rbs,cbs;
1413   PetscErrorCode  ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1417   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1418   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1419   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1420   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1421   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1427 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1428 {
1429   PetscErrorCode ierr;
1430   PetscInt       i;
1431   PetscTruth     flg;
1432 
1433   PetscFunctionBegin;
1434   for (i=0; i<n; i++) {
1435     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1436     if (!flg) {
1437       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1438     }
1439   }
1440   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 
1445 /* -------------------------------------------------------------------*/
1446 static struct _MatOps MatOps_Values = {
1447        MatSetValues_MPISBAIJ,
1448        MatGetRow_MPISBAIJ,
1449        MatRestoreRow_MPISBAIJ,
1450        MatMult_MPISBAIJ,
1451 /* 4*/ MatMultAdd_MPISBAIJ,
1452        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1453        MatMultAdd_MPISBAIJ,
1454        0,
1455        0,
1456        0,
1457 /*10*/ 0,
1458        0,
1459        0,
1460        MatSOR_MPISBAIJ,
1461        MatTranspose_MPISBAIJ,
1462 /*15*/ MatGetInfo_MPISBAIJ,
1463        MatEqual_MPISBAIJ,
1464        MatGetDiagonal_MPISBAIJ,
1465        MatDiagonalScale_MPISBAIJ,
1466        MatNorm_MPISBAIJ,
1467 /*20*/ MatAssemblyBegin_MPISBAIJ,
1468        MatAssemblyEnd_MPISBAIJ,
1469        MatSetOption_MPISBAIJ,
1470        MatZeroEntries_MPISBAIJ,
1471 /*24*/ 0,
1472        0,
1473        0,
1474        0,
1475        0,
1476 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*34*/ MatDuplicate_MPISBAIJ,
1482        0,
1483        0,
1484        0,
1485        0,
1486 /*39*/ MatAXPY_MPISBAIJ,
1487        MatGetSubMatrices_MPISBAIJ,
1488        MatIncreaseOverlap_MPISBAIJ,
1489        MatGetValues_MPISBAIJ,
1490        MatCopy_MPISBAIJ,
1491 /*44*/ 0,
1492        MatScale_MPISBAIJ,
1493        0,
1494        0,
1495        0,
1496 /*49*/ MatSetBlockSize_MPISBAIJ,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*54*/ 0,
1502        0,
1503        MatSetUnfactored_MPISBAIJ,
1504        0,
1505        MatSetValuesBlocked_MPISBAIJ,
1506 /*59*/ 0,
1507        0,
1508        0,
1509        0,
1510        0,
1511 /*64*/ 0,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*74*/ 0,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*79*/ 0,
1527        0,
1528        0,
1529        0,
1530        MatLoad_MPISBAIJ,
1531 /*84*/ 0,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*89*/ 0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*94*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*99*/ 0,
1547        0,
1548        0,
1549        0,
1550        0,
1551 /*104*/0,
1552        MatRealPart_MPISBAIJ,
1553        MatImaginaryPart_MPISBAIJ,
1554        MatGetRowUpperTriangular_MPISBAIJ,
1555        MatRestoreRowUpperTriangular_MPISBAIJ
1556 };
1557 
1558 
1559 EXTERN_C_BEGIN
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1562 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1563 {
1564   PetscFunctionBegin;
1565   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1566   *iscopy = PETSC_FALSE;
1567   PetscFunctionReturn(0);
1568 }
1569 EXTERN_C_END
1570 
1571 EXTERN_C_BEGIN
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1574 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1575 {
1576   Mat_MPISBAIJ   *b;
1577   PetscErrorCode ierr;
1578   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1579 
1580   PetscFunctionBegin;
1581   if (bs < 0){
1582     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1583       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1584     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1585     bs   = PetscAbs(bs);
1586   }
1587   if ((d_nnz || o_nnz) && newbs != bs) {
1588     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1589   }
1590   bs = newbs;
1591 
1592   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1593   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1594   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1595   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1596 
1597   B->rmap->bs = B->cmap->bs = bs;
1598   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1599   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1600 
1601   if (d_nnz) {
1602     for (i=0; i<B->rmap->n/bs; i++) {
1603       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1604     }
1605   }
1606   if (o_nnz) {
1607     for (i=0; i<B->rmap->n/bs; i++) {
1608       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1609     }
1610   }
1611 
1612   b   = (Mat_MPISBAIJ*)B->data;
1613   mbs = B->rmap->n/bs;
1614   Mbs = B->rmap->N/bs;
1615   if (mbs*bs != B->rmap->n) {
1616     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1617   }
1618 
1619   B->rmap->bs  = bs;
1620   b->bs2 = bs*bs;
1621   b->mbs = mbs;
1622   b->nbs = mbs;
1623   b->Mbs = Mbs;
1624   b->Nbs = Mbs;
1625 
1626   for (i=0; i<=b->size; i++) {
1627     b->rangebs[i] = B->rmap->range[i]/bs;
1628   }
1629   b->rstartbs = B->rmap->rstart/bs;
1630   b->rendbs   = B->rmap->rend/bs;
1631 
1632   b->cstartbs = B->cmap->rstart/bs;
1633   b->cendbs   = B->cmap->rend/bs;
1634 
1635   if (!B->preallocated) {
1636     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1637     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1638     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1639     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1640     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1641     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1642     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1643     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1644     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1645   }
1646 
1647   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1648   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1649   B->preallocated = PETSC_TRUE;
1650   PetscFunctionReturn(0);
1651 }
1652 EXTERN_C_END
1653 
1654 EXTERN_C_BEGIN
1655 #if defined(PETSC_HAVE_MUMPS)
1656 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1657 #endif
1658 #if defined(PETSC_HAVE_SPOOLES)
1659 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1660 #endif
1661 #if defined(PETSC_HAVE_PASTIX)
1662 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1663 #endif
1664 EXTERN_C_END
1665 
1666 /*MC
1667    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1668    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1669 
1670    Options Database Keys:
1671 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1672 
1673   Level: beginner
1674 
1675 .seealso: MatCreateMPISBAIJ
1676 M*/
1677 
1678 EXTERN_C_BEGIN
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatCreate_MPISBAIJ"
1681 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1682 {
1683   Mat_MPISBAIJ   *b;
1684   PetscErrorCode ierr;
1685   PetscTruth     flg;
1686 
1687   PetscFunctionBegin;
1688 
1689   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1690   B->data = (void*)b;
1691   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1692 
1693   B->ops->destroy    = MatDestroy_MPISBAIJ;
1694   B->ops->view       = MatView_MPISBAIJ;
1695   B->mapping         = 0;
1696   B->assembled       = PETSC_FALSE;
1697 
1698   B->insertmode = NOT_SET_VALUES;
1699   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1700   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1701 
1702   /* build local table of row and column ownerships */
1703   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1704 
1705   /* build cache for off array entries formed */
1706   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1707   b->donotstash  = PETSC_FALSE;
1708   b->colmap      = PETSC_NULL;
1709   b->garray      = PETSC_NULL;
1710   b->roworiented = PETSC_TRUE;
1711 
1712   /* stuff used in block assembly */
1713   b->barray       = 0;
1714 
1715   /* stuff used for matrix vector multiply */
1716   b->lvec         = 0;
1717   b->Mvctx        = 0;
1718   b->slvec0       = 0;
1719   b->slvec0b      = 0;
1720   b->slvec1       = 0;
1721   b->slvec1a      = 0;
1722   b->slvec1b      = 0;
1723   b->sMvctx       = 0;
1724 
1725   /* stuff for MatGetRow() */
1726   b->rowindices   = 0;
1727   b->rowvalues    = 0;
1728   b->getrowactive = PETSC_FALSE;
1729 
1730   /* hash table stuff */
1731   b->ht           = 0;
1732   b->hd           = 0;
1733   b->ht_size      = 0;
1734   b->ht_flag      = PETSC_FALSE;
1735   b->ht_fact      = 0;
1736   b->ht_total_ct  = 0;
1737   b->ht_insert_ct = 0;
1738 
1739   b->in_loc       = 0;
1740   b->v_loc        = 0;
1741   b->n_loc        = 0;
1742   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1743     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1744     if (flg) {
1745       PetscReal fact = 1.39;
1746       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1747       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1748       if (fact <= 1.0) fact = 1.39;
1749       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1750       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1751     }
1752   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1753 
1754 #if defined(PETSC_HAVE_PASTIX)
1755   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1756 					   "MatGetFactor_mpisbaij_pastix",
1757 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1758 #endif
1759 #if defined(PETSC_HAVE_MUMPS)
1760   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1761                                      "MatGetFactor_mpisbaij_mumps",
1762                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1763 #endif
1764 #if defined(PETSC_HAVE_SPOOLES)
1765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1766                                      "MatGetFactor_mpisbaij_spooles",
1767                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1768 #endif
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1770                                      "MatStoreValues_MPISBAIJ",
1771                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1773                                      "MatRetrieveValues_MPISBAIJ",
1774                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1776                                      "MatGetDiagonalBlock_MPISBAIJ",
1777                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1779                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1780                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1781   B->symmetric                  = PETSC_TRUE;
1782   B->structurally_symmetric     = PETSC_TRUE;
1783   B->symmetric_set              = PETSC_TRUE;
1784   B->structurally_symmetric_set = PETSC_TRUE;
1785   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 EXTERN_C_END
1789 
1790 /*MC
1791    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1792 
1793    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1794    and MATMPISBAIJ otherwise.
1795 
1796    Options Database Keys:
1797 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1798 
1799   Level: beginner
1800 
1801 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1802 M*/
1803 
1804 EXTERN_C_BEGIN
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatCreate_SBAIJ"
1807 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1808 {
1809   PetscErrorCode ierr;
1810   PetscMPIInt    size;
1811 
1812   PetscFunctionBegin;
1813   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1814   if (size == 1) {
1815     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1816   } else {
1817     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 EXTERN_C_END
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1825 /*@C
1826    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1827    the user should preallocate the matrix storage by setting the parameters
1828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1829    performance can be increased by more than a factor of 50.
1830 
1831    Collective on Mat
1832 
1833    Input Parameters:
1834 +  A - the matrix
1835 .  bs   - size of blockk
1836 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1837            submatrix  (same for all local rows)
1838 .  d_nnz - array containing the number of block nonzeros in the various block rows
1839            in the upper triangular and diagonal part of the in diagonal portion of the local
1840            (possibly different for each block row) or PETSC_NULL.  You must leave room
1841            for the diagonal entry even if it is zero.
1842 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1843            submatrix (same for all local rows).
1844 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1845            off-diagonal portion of the local submatrix (possibly different for
1846            each block row) or PETSC_NULL.
1847 
1848 
1849    Options Database Keys:
1850 .   -mat_no_unroll - uses code that does not unroll the loops in the
1851                      block calculations (much slower)
1852 .   -mat_block_size - size of the blocks to use
1853 
1854    Notes:
1855 
1856    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1857    than it must be used on all processors that share the object for that argument.
1858 
1859    If the *_nnz parameter is given then the *_nz parameter is ignored
1860 
1861    Storage Information:
1862    For a square global matrix we define each processor's diagonal portion
1863    to be its local rows and the corresponding columns (a square submatrix);
1864    each processor's off-diagonal portion encompasses the remainder of the
1865    local matrix (a rectangular submatrix).
1866 
1867    The user can specify preallocated storage for the diagonal part of
1868    the local submatrix with either d_nz or d_nnz (not both).  Set
1869    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1870    memory allocation.  Likewise, specify preallocated storage for the
1871    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1872 
1873    You can call MatGetInfo() to get information on how effective the preallocation was;
1874    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1875    You can also run with the option -info and look for messages with the string
1876    malloc in them to see if additional memory allocation was needed.
1877 
1878    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1879    the figure below we depict these three local rows and all columns (0-11).
1880 
1881 .vb
1882            0 1 2 3 4 5 6 7 8 9 10 11
1883           -------------------
1884    row 3  |  o o o d d d o o o o o o
1885    row 4  |  o o o d d d o o o o o o
1886    row 5  |  o o o d d d o o o o o o
1887           -------------------
1888 .ve
1889 
1890    Thus, any entries in the d locations are stored in the d (diagonal)
1891    submatrix, and any entries in the o locations are stored in the
1892    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1893    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1894 
1895    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1896    plus the diagonal part of the d matrix,
1897    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1898    In general, for PDE problems in which most nonzeros are near the diagonal,
1899    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1900    or you will get TERRIBLE performance; see the users' manual chapter on
1901    matrices.
1902 
1903    Level: intermediate
1904 
1905 .keywords: matrix, block, aij, compressed row, sparse, parallel
1906 
1907 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1908 @*/
1909 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1910 {
1911   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1912 
1913   PetscFunctionBegin;
1914   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1915   if (f) {
1916     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1917   }
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatCreateMPISBAIJ"
1923 /*@C
1924    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1925    (block compressed row).  For good matrix assembly performance
1926    the user should preallocate the matrix storage by setting the parameters
1927    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1928    performance can be increased by more than a factor of 50.
1929 
1930    Collective on MPI_Comm
1931 
1932    Input Parameters:
1933 +  comm - MPI communicator
1934 .  bs   - size of blockk
1935 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1936            This value should be the same as the local size used in creating the
1937            y vector for the matrix-vector product y = Ax.
1938 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1939            This value should be the same as the local size used in creating the
1940            x vector for the matrix-vector product y = Ax.
1941 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1942 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1943 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1944            submatrix  (same for all local rows)
1945 .  d_nnz - array containing the number of block nonzeros in the various block rows
1946            in the upper triangular portion of the in diagonal portion of the local
1947            (possibly different for each block block row) or PETSC_NULL.
1948            You must leave room for the diagonal entry even if it is zero.
1949 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1950            submatrix (same for all local rows).
1951 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1952            off-diagonal portion of the local submatrix (possibly different for
1953            each block row) or PETSC_NULL.
1954 
1955    Output Parameter:
1956 .  A - the matrix
1957 
1958    Options Database Keys:
1959 .   -mat_no_unroll - uses code that does not unroll the loops in the
1960                      block calculations (much slower)
1961 .   -mat_block_size - size of the blocks to use
1962 .   -mat_mpi - use the parallel matrix data structures even on one processor
1963                (defaults to using SeqBAIJ format on one processor)
1964 
1965    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1966    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1967    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1968 
1969    Notes:
1970    The number of rows and columns must be divisible by blocksize.
1971    This matrix type does not support complex Hermitian operation.
1972 
1973    The user MUST specify either the local or global matrix dimensions
1974    (possibly both).
1975 
1976    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1977    than it must be used on all processors that share the object for that argument.
1978 
1979    If the *_nnz parameter is given then the *_nz parameter is ignored
1980 
1981    Storage Information:
1982    For a square global matrix we define each processor's diagonal portion
1983    to be its local rows and the corresponding columns (a square submatrix);
1984    each processor's off-diagonal portion encompasses the remainder of the
1985    local matrix (a rectangular submatrix).
1986 
1987    The user can specify preallocated storage for the diagonal part of
1988    the local submatrix with either d_nz or d_nnz (not both).  Set
1989    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1990    memory allocation.  Likewise, specify preallocated storage for the
1991    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1992 
1993    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1994    the figure below we depict these three local rows and all columns (0-11).
1995 
1996 .vb
1997            0 1 2 3 4 5 6 7 8 9 10 11
1998           -------------------
1999    row 3  |  o o o d d d o o o o o o
2000    row 4  |  o o o d d d o o o o o o
2001    row 5  |  o o o d d d o o o o o o
2002           -------------------
2003 .ve
2004 
2005    Thus, any entries in the d locations are stored in the d (diagonal)
2006    submatrix, and any entries in the o locations are stored in the
2007    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2008    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2009 
2010    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2011    plus the diagonal part of the d matrix,
2012    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2013    In general, for PDE problems in which most nonzeros are near the diagonal,
2014    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2015    or you will get TERRIBLE performance; see the users' manual chapter on
2016    matrices.
2017 
2018    Level: intermediate
2019 
2020 .keywords: matrix, block, aij, compressed row, sparse, parallel
2021 
2022 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2023 @*/
2024 
2025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2026 {
2027   PetscErrorCode ierr;
2028   PetscMPIInt    size;
2029 
2030   PetscFunctionBegin;
2031   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2032   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2034   if (size > 1) {
2035     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2036     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2037   } else {
2038     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2039     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2040   }
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2047 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2048 {
2049   Mat            mat;
2050   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2051   PetscErrorCode ierr;
2052   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2053   PetscScalar    *array;
2054 
2055   PetscFunctionBegin;
2056   *newmat       = 0;
2057   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2058   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2059   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2060   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2061   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2062   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2063 
2064   mat->factor       = matin->factor;
2065   mat->preallocated = PETSC_TRUE;
2066   mat->assembled    = PETSC_TRUE;
2067   mat->insertmode   = NOT_SET_VALUES;
2068 
2069   a = (Mat_MPISBAIJ*)mat->data;
2070   a->bs2   = oldmat->bs2;
2071   a->mbs   = oldmat->mbs;
2072   a->nbs   = oldmat->nbs;
2073   a->Mbs   = oldmat->Mbs;
2074   a->Nbs   = oldmat->Nbs;
2075 
2076 
2077   a->size         = oldmat->size;
2078   a->rank         = oldmat->rank;
2079   a->donotstash   = oldmat->donotstash;
2080   a->roworiented  = oldmat->roworiented;
2081   a->rowindices   = 0;
2082   a->rowvalues    = 0;
2083   a->getrowactive = PETSC_FALSE;
2084   a->barray       = 0;
2085   a->rstartbs    = oldmat->rstartbs;
2086   a->rendbs      = oldmat->rendbs;
2087   a->cstartbs    = oldmat->cstartbs;
2088   a->cendbs      = oldmat->cendbs;
2089 
2090   /* hash table stuff */
2091   a->ht           = 0;
2092   a->hd           = 0;
2093   a->ht_size      = 0;
2094   a->ht_flag      = oldmat->ht_flag;
2095   a->ht_fact      = oldmat->ht_fact;
2096   a->ht_total_ct  = 0;
2097   a->ht_insert_ct = 0;
2098 
2099   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2100   if (oldmat->colmap) {
2101 #if defined (PETSC_USE_CTABLE)
2102     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2103 #else
2104     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2105     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2106     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2107 #endif
2108   } else a->colmap = 0;
2109 
2110   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2111     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2112     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2113     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2114   } else a->garray = 0;
2115 
2116   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2117   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2119   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2121 
2122   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2123   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2124   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2125   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2126 
2127   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2128   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2131   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2132   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2133   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2134   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2140 
2141   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2142   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2143   a->sMvctx = oldmat->sMvctx;
2144   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2145 
2146   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2148   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2149   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2150   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2151   *newmat = mat;
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatLoad_MPISBAIJ"
2157 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2158 {
2159   Mat            A;
2160   PetscErrorCode ierr;
2161   PetscInt       i,nz,j,rstart,rend;
2162   PetscScalar    *vals,*buf;
2163   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2164   MPI_Status     status;
2165   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2166   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2167   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2168   PetscInt       bs=1,Mbs,mbs,extra_rows;
2169   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2170   PetscInt       dcount,kmax,k,nzcount,tmp;
2171   int            fd;
2172 
2173   PetscFunctionBegin;
2174   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2175     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2176   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2177 
2178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2180   if (!rank) {
2181     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2182     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2183     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2184     if (header[3] < 0) {
2185       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2186     }
2187   }
2188 
2189   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2190   M = header[1]; N = header[2];
2191 
2192   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2193 
2194   /*
2195      This code adds extra rows to make sure the number of rows is
2196      divisible by the blocksize
2197   */
2198   Mbs        = M/bs;
2199   extra_rows = bs - M + bs*(Mbs);
2200   if (extra_rows == bs) extra_rows = 0;
2201   else                  Mbs++;
2202   if (extra_rows &&!rank) {
2203     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2204   }
2205 
2206   /* determine ownership of all rows */
2207   mbs        = Mbs/size + ((Mbs % size) > rank);
2208   m          = mbs*bs;
2209   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2210   mmbs       = PetscMPIIntCast(mbs);
2211   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2212   rowners[0] = 0;
2213   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2214   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2215   rstart = rowners[rank];
2216   rend   = rowners[rank+1];
2217 
2218   /* distribute row lengths to all processors */
2219   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2220   if (!rank) {
2221     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2222     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2223     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2224     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2225     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2226     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2227     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2228   } else {
2229     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2230   }
2231 
2232   if (!rank) {   /* procs[0] */
2233     /* calculate the number of nonzeros on each processor */
2234     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2235     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2236     for (i=0; i<size; i++) {
2237       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2238         procsnz[i] += rowlengths[j];
2239       }
2240     }
2241     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2242 
2243     /* determine max buffer needed and allocate it */
2244     maxnz = 0;
2245     for (i=0; i<size; i++) {
2246       maxnz = PetscMax(maxnz,procsnz[i]);
2247     }
2248     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2249 
2250     /* read in my part of the matrix column indices  */
2251     nz     = procsnz[0];
2252     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2253     mycols = ibuf;
2254     if (size == 1)  nz -= extra_rows;
2255     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2256     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2257 
2258     /* read in every ones (except the last) and ship off */
2259     for (i=1; i<size-1; i++) {
2260       nz   = procsnz[i];
2261       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2262       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2263     }
2264     /* read in the stuff for the last proc */
2265     if (size != 1) {
2266       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2267       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2268       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2269       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2270     }
2271     ierr = PetscFree(cols);CHKERRQ(ierr);
2272   } else {  /* procs[i], i>0 */
2273     /* determine buffer space needed for message */
2274     nz = 0;
2275     for (i=0; i<m; i++) {
2276       nz += locrowlens[i];
2277     }
2278     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2279     mycols = ibuf;
2280     /* receive message of column indices*/
2281     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2282     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2283     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2284   }
2285 
2286   /* loop over local rows, determining number of off diagonal entries */
2287   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2288   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2289   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2290   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2291   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2292   rowcount = 0;
2293   nzcount  = 0;
2294   for (i=0; i<mbs; i++) {
2295     dcount  = 0;
2296     odcount = 0;
2297     for (j=0; j<bs; j++) {
2298       kmax = locrowlens[rowcount];
2299       for (k=0; k<kmax; k++) {
2300         tmp = mycols[nzcount++]/bs; /* block col. index */
2301         if (!mask[tmp]) {
2302           mask[tmp] = 1;
2303           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2304           else masked1[dcount++] = tmp; /* entry in diag portion */
2305         }
2306       }
2307       rowcount++;
2308     }
2309 
2310     dlens[i]  = dcount;  /* d_nzz[i] */
2311     odlens[i] = odcount; /* o_nzz[i] */
2312 
2313     /* zero out the mask elements we set */
2314     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2315     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2316   }
2317 
2318   /* create our matrix */
2319   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2320   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2321   ierr = MatSetType(A,type);CHKERRQ(ierr);
2322   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2323   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2324 
2325   if (!rank) {
2326     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2327     /* read in my part of the matrix numerical values  */
2328     nz = procsnz[0];
2329     vals = buf;
2330     mycols = ibuf;
2331     if (size == 1)  nz -= extra_rows;
2332     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2333     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2334 
2335     /* insert into matrix */
2336     jj      = rstart*bs;
2337     for (i=0; i<m; i++) {
2338       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2339       mycols += locrowlens[i];
2340       vals   += locrowlens[i];
2341       jj++;
2342     }
2343 
2344     /* read in other processors (except the last one) and ship out */
2345     for (i=1; i<size-1; i++) {
2346       nz   = procsnz[i];
2347       vals = buf;
2348       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2349       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2350     }
2351     /* the last proc */
2352     if (size != 1){
2353       nz   = procsnz[i] - extra_rows;
2354       vals = buf;
2355       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2356       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2357       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2358     }
2359     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2360 
2361   } else {
2362     /* receive numeric values */
2363     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2364 
2365     /* receive message of values*/
2366     vals   = buf;
2367     mycols = ibuf;
2368     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2369     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2370     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2371 
2372     /* insert into matrix */
2373     jj      = rstart*bs;
2374     for (i=0; i<m; i++) {
2375       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2376       mycols += locrowlens[i];
2377       vals   += locrowlens[i];
2378       jj++;
2379     }
2380   }
2381 
2382   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2383   ierr = PetscFree(buf);CHKERRQ(ierr);
2384   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2385   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2386   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2387   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   *newmat = A;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2396 /*XXXXX@
2397    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2398 
2399    Input Parameters:
2400 .  mat  - the matrix
2401 .  fact - factor
2402 
2403    Collective on Mat
2404 
2405    Level: advanced
2406 
2407   Notes:
2408    This can also be set by the command line option: -mat_use_hash_table fact
2409 
2410 .keywords: matrix, hashtable, factor, HT
2411 
2412 .seealso: MatSetOption()
2413 @XXXXX*/
2414 
2415 
2416 #undef __FUNCT__
2417 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2418 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2419 {
2420   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2421   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2422   PetscReal      atmp;
2423   PetscReal      *work,*svalues,*rvalues;
2424   PetscErrorCode ierr;
2425   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2426   PetscMPIInt    rank,size;
2427   PetscInt       *rowners_bs,dest,count,source;
2428   PetscScalar    *va;
2429   MatScalar      *ba;
2430   MPI_Status     stat;
2431 
2432   PetscFunctionBegin;
2433   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2434   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2436 
2437   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2438   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2439 
2440   bs   = A->rmap->bs;
2441   mbs  = a->mbs;
2442   Mbs  = a->Mbs;
2443   ba   = b->a;
2444   bi   = b->i;
2445   bj   = b->j;
2446 
2447   /* find ownerships */
2448   rowners_bs = A->rmap->range;
2449 
2450   /* each proc creates an array to be distributed */
2451   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2452   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2453 
2454   /* row_max for B */
2455   if (rank != size-1){
2456     for (i=0; i<mbs; i++) {
2457       ncols = bi[1] - bi[0]; bi++;
2458       brow  = bs*i;
2459       for (j=0; j<ncols; j++){
2460         bcol = bs*(*bj);
2461         for (kcol=0; kcol<bs; kcol++){
2462           col = bcol + kcol;                 /* local col index */
2463           col += rowners_bs[rank+1];      /* global col index */
2464           for (krow=0; krow<bs; krow++){
2465             atmp = PetscAbsScalar(*ba); ba++;
2466             row = brow + krow;    /* local row index */
2467             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2468             if (work[col] < atmp) work[col] = atmp;
2469           }
2470         }
2471         bj++;
2472       }
2473     }
2474 
2475     /* send values to its owners */
2476     for (dest=rank+1; dest<size; dest++){
2477       svalues = work + rowners_bs[dest];
2478       count   = rowners_bs[dest+1]-rowners_bs[dest];
2479       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2480     }
2481   }
2482 
2483   /* receive values */
2484   if (rank){
2485     rvalues = work;
2486     count   = rowners_bs[rank+1]-rowners_bs[rank];
2487     for (source=0; source<rank; source++){
2488       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2489       /* process values */
2490       for (i=0; i<count; i++){
2491         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2492       }
2493     }
2494   }
2495 
2496   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2497   ierr = PetscFree(work);CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatSOR_MPISBAIJ"
2503 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2504 {
2505   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2506   PetscErrorCode ierr;
2507   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2508   PetscScalar    *x,*b,*ptr,*from;
2509   Vec            bb1;
2510 
2511   PetscFunctionBegin;
2512   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2513   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2514 
2515   if (flag == SOR_APPLY_UPPER) {
2516     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2517     PetscFunctionReturn(0);
2518   }
2519 
2520   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2521     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2523       its--;
2524     }
2525 
2526     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2527     while (its--){
2528 
2529       /* lower triangular part: slvec0b = - B^T*xx */
2530       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2531 
2532       /* copy xx into slvec0a */
2533       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2534       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2535       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2536       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2537 
2538       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2539 
2540       /* copy bb into slvec1a */
2541       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2542       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2543       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2544       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2545 
2546       /* set slvec1b = 0 */
2547       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2548 
2549       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2550       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2551       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2552       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2553 
2554       /* upper triangular part: bb1 = bb1 - B*x */
2555       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2556 
2557       /* local diagonal sweep */
2558       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2559     }
2560     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2561   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2562     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2563   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2564     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2565   } else if (flag & SOR_EISENSTAT) {
2566     Vec               xx1;
2567     PetscTruth        hasop;
2568     const PetscScalar *diag;
2569     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2570     PetscInt          i,n;
2571 
2572     if (!mat->xx1) {
2573       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2574       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2575     }
2576     xx1 = mat->xx1;
2577     bb1 = mat->bb1;
2578 
2579     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2580 
2581     if (!mat->diag) {
2582       /* this is wrong for same matrix with new nonzero values */
2583       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2584       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2585     }
2586     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2587 
2588     if (hasop) {
2589       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2590       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2591     } else {
2592       /*
2593           These two lines are replaced by code that may be a bit faster for a good compiler
2594       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2595       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2596       */
2597       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2598       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2599       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2600       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2601       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2602       if (omega == 1.0) {
2603 	for (i=0; i<n; i++) {
2604 	  sl[i] = b[i] - diag[i]*x[i];
2605 	}
2606         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2607       } else {
2608 	for (i=0; i<n; i++) {
2609 	  sl[i] = b[i] + scale*diag[i]*x[i];
2610 	}
2611         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2612       }
2613       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2614       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2615       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2616       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2617     }
2618 
2619     /* multiply off-diagonal portion of matrix */
2620     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2621     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2622     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2623     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2624     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2625     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2626     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2627     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2628     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2629     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2630 
2631     /* local sweep */
2632     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2633     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2634   } else {
2635     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2636   }
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 #undef __FUNCT__
2641 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2642 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2643 {
2644   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2645   PetscErrorCode ierr;
2646   Vec            lvec1,bb1;
2647 
2648   PetscFunctionBegin;
2649   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2650   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2651 
2652   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2653     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2654       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2655       its--;
2656     }
2657 
2658     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2659     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2660     while (its--){
2661       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2662 
2663       /* lower diagonal part: bb1 = bb - B^T*xx */
2664       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2665       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2666 
2667       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2668       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2669       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2670 
2671       /* upper diagonal part: bb1 = bb1 - B*x */
2672       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2673       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2674 
2675       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2676 
2677       /* diagonal sweep */
2678       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2679     }
2680     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2681     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2682   } else {
2683     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2684   }
2685   PetscFunctionReturn(0);
2686 }
2687 
2688