xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b24ad0428a60bb3d8d9be05f5b432634d28bad96)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
290   /* PetscFunctionReturn(0); */
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,N = m*n*b->bs2;
300   MatScalar      *vsingle;
301 
302   PetscFunctionBegin;
303   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
304   /* PetscFunctionReturn(0); */
305 }
306 #endif
307 
308 /* Only add/insert a(i,j) with i<=j (blocks).
309    Any a(i,j) with i>j input by user is ingored.
310 */
311 #undef __FUNCT__
312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
314 {
315   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
316   MatScalar      value;
317   PetscTruth     roworiented = baij->roworiented;
318   PetscErrorCode ierr;
319   PetscInt       i,j,row,col;
320   PetscInt       rstart_orig=baij->rstart_bs;
321   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
322   PetscInt       cend_orig=baij->cend_bs,bs=baij->bs;
323 
324   /* Some Variables required in the macro */
325   Mat            A = baij->A;
326   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
327   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
328   MatScalar      *aa=a->a;
329 
330   Mat            B = baij->B;
331   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
332   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
333   MatScalar     *ba=b->a;
334 
335   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
336   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
337   MatScalar     *ap,*bap;
338 
339   /* for stash */
340   PetscInt      n_loc, *in_loc=0;
341   MatScalar     *v_loc=0;
342 
343   PetscFunctionBegin;
344 
345   if(!baij->donotstash){
346     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
347     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
348   }
349 
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_BOPT_g)
353     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
354 #endif
355     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
356       row = im[i] - rstart_orig;              /* local row index */
357       for (j=0; j<n; j++) {
358         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
359         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
360           col = in[j] - cstart_orig;          /* local col index */
361           brow = row/bs; bcol = col/bs;
362           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         } else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
369 #endif
370         else {  /* off-diag entry (B) */
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 #if defined (PETSC_USE_CTABLE)
376             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
377             col  = col - 1;
378 #else
379             col = baij->colmap[in[j]/bs] - 1;
380 #endif
381             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
382               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
383               col =  in[j];
384               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
385               B = baij->B;
386               b = (Mat_SeqBAIJ*)(B)->data;
387               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
388               ba=b->a;
389             } else col += in[j]%bs;
390           } else col = in[j];
391           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
392           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
393           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
394         }
395       }
396     } else {  /* off processor entry */
397       if (!baij->donotstash) {
398         n_loc = 0;
399         for (j=0; j<n; j++){
400           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
401           in_loc[n_loc] = in[j];
402           if (roworiented) {
403             v_loc[n_loc] = v[i*n+j];
404           } else {
405             v_loc[n_loc] = v[j*m+i];
406           }
407           n_loc++;
408         }
409         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
410       }
411     }
412   }
413 
414   if(!baij->donotstash){
415     ierr = PetscFree(in_loc);CHKERRQ(ierr);
416     ierr = PetscFree(v_loc);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
424 {
425   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
426   const MatScalar *value;
427   MatScalar       *barray=baij->barray;
428   PetscTruth      roworiented = baij->roworiented;
429   PetscErrorCode  ierr;
430   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
431   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
432   PetscInt        cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
433 
434   PetscFunctionBegin;
435   if(!barray) {
436     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
437     baij->barray = barray;
438   }
439 
440   if (roworiented) {
441     stepval = (n-1)*bs;
442   } else {
443     stepval = (m-1)*bs;
444   }
445   for (i=0; i<m; i++) {
446     if (im[i] < 0) continue;
447 #if defined(PETSC_USE_BOPT_g)
448     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
449 #endif
450     if (im[i] >= rstart && im[i] < rend) {
451       row = im[i] - rstart;
452       for (j=0; j<n; j++) {
453         /* If NumCol = 1 then a copy is not required */
454         if ((roworiented) && (n == 1)) {
455           barray = (MatScalar*) v + i*bs2;
456         } else if((!roworiented) && (m == 1)) {
457           barray = (MatScalar*) v + j*bs2;
458         } else { /* Here a copy is required */
459           if (roworiented) {
460             value = v + i*(stepval+bs)*bs + j*bs;
461           } else {
462             value = v + j*(stepval+bs)*bs + i*bs;
463           }
464           for (ii=0; ii<bs; ii++,value+=stepval) {
465             for (jj=0; jj<bs; jj++) {
466               *barray++  = *value++;
467             }
468           }
469           barray -=bs2;
470         }
471 
472         if (in[j] >= cstart && in[j] < cend){
473           col  = in[j] - cstart;
474           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476         else if (in[j] < 0) continue;
477 #if defined(PETSC_USE_BOPT_g)
478         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
479 #endif
480         else {
481           if (mat->was_assembled) {
482             if (!baij->colmap) {
483               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
484             }
485 
486 #if defined(PETSC_USE_BOPT_g)
487 #if defined (PETSC_USE_CTABLE)
488             { PetscInt data;
489               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
490               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
491             }
492 #else
493             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
494 #endif
495 #endif
496 #if defined (PETSC_USE_CTABLE)
497 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
498             col  = (col - 1)/bs;
499 #else
500             col = (baij->colmap[in[j]] - 1)/bs;
501 #endif
502             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
503               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
504               col =  in[j];
505             }
506           }
507           else col = in[j];
508           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510       }
511     } else {
512       if (!baij->donotstash) {
513         if (roworiented) {
514           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #define HASH_KEY 0.6180339887
525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
526 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
527 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
528 #undef __FUNCT__
529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
531 {
532   PetscFunctionBegin;
533   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
534   /* PetscFunctionReturn(0); */
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
553   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
558     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
563         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend){
565           col = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined (PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data --;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else {
585       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
586     }
587   }
588  PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatNorm_MPISBAIJ"
593 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
594 {
595   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
596   PetscErrorCode ierr;
597   PetscReal      sum[2],*lnorm2;
598 
599   PetscFunctionBegin;
600   if (baij->size == 1) {
601     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
602   } else {
603     if (type == NORM_FROBENIUS) {
604       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
605       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
607       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
608       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
609       /*
610       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
611       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
612       */
613       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
614       /*
615       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
616       PetscSynchronizedFlush(mat->comm); */
617 
618       *norm = sqrt(sum[0] + 2*sum[1]);
619       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
620     } else {
621       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
622     }
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 /*
628   Creates the hash table, and sets the table
629   This table is created only once.
630   If new entried need to be added to the matrix
631   then the hash table has to be destroyed and
632   recreated.
633 */
634 #undef __FUNCT__
635 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
636 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
637 {
638   PetscFunctionBegin;
639   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
640   /* PetscFunctionReturn(0); */
641 }
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
645 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (baij->donotstash) {
654     PetscFunctionReturn(0);
655   }
656 
657   /* make sure all processors are either in INSERTMODE or ADDMODE */
658   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
659   if (addv == (ADD_VALUES|INSERT_VALUES)) {
660     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
661   }
662   mat->insertmode = addv; /* in case this processor had no cache */
663 
664   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
665   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
666   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
667   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
668   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
669   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
670   PetscFunctionReturn(0);
671 }
672 
673 #undef __FUNCT__
674 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
675 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
676 {
677   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
678   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
679   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
680   PetscErrorCode ierr;
681   PetscInt       i,j,rstart,ncols,n,flg,bs2=baij->bs2;
682   PetscInt       *row,*col,other_disassembled;
683   PetscTruth     r1,r2,r3;
684   MatScalar      *val;
685   InsertMode     addv = mat->insertmode;
686 
687   PetscFunctionBegin;
688 
689   if (!baij->donotstash) {
690     while (1) {
691       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
692       /*
693       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
694       PetscSynchronizedFlush(mat->comm);
695       */
696       if (!flg) break;
697 
698       for (i=0; i<n;) {
699         /* Now identify the consecutive vals belonging to the same row */
700         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
701         if (j < n) ncols = j-i;
702         else       ncols = n-i;
703         /* Now assemble all these values with a single function call */
704         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
705         i = j;
706       }
707     }
708     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
709     /* Now process the block-stash. Since the values are stashed column-oriented,
710        set the roworiented flag to column oriented, and after MatSetValues()
711        restore the original flags */
712     r1 = baij->roworiented;
713     r2 = a->roworiented;
714     r3 = b->roworiented;
715     baij->roworiented = PETSC_FALSE;
716     a->roworiented    = PETSC_FALSE;
717     b->roworiented    = PETSC_FALSE;
718     while (1) {
719       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
720       if (!flg) break;
721 
722       for (i=0; i<n;) {
723         /* Now identify the consecutive vals belonging to the same row */
724         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
725         if (j < n) ncols = j-i;
726         else       ncols = n-i;
727         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
728         i = j;
729       }
730     }
731     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
732     baij->roworiented = r1;
733     a->roworiented    = r2;
734     b->roworiented    = r3;
735   }
736 
737   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
738   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
739 
740   /* determine if any processor has disassembled, if so we must
741      also disassemble ourselfs, in order that we may reassemble. */
742   /*
743      if nonzero structure of submatrix B cannot change then we know that
744      no processor disassembled thus we can skip this stuff
745   */
746   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
747     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
748     if (mat->was_assembled && !other_disassembled) {
749       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
750     }
751   }
752 
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
755   }
756   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
757   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
758 
759 #if defined(PETSC_USE_BOPT_g)
760   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
761     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
762     baij->ht_total_ct  = 0;
763     baij->ht_insert_ct = 0;
764   }
765 #endif
766   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
767     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
768     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
769     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
770   }
771 
772   if (baij->rowvalues) {
773     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
774     baij->rowvalues = 0;
775   }
776 
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
782 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
783 {
784   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
785   PetscErrorCode    ierr;
786   PetscInt          bs = baij->bs;
787   PetscMPIInt       size = baij->size,rank = baij->rank;
788   PetscTruth        iascii,isdraw;
789   PetscViewer       sviewer;
790   PetscViewerFormat format;
791 
792   PetscFunctionBegin;
793   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
794   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
795   if (iascii) {
796     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
797     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
798       MatInfo info;
799       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
800       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
801       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
802               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
803               baij->bs,(int)info.memory);CHKERRQ(ierr);
804       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
805       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
806       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
807       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
808       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
809       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
810       PetscFunctionReturn(0);
811     } else if (format == PETSC_VIEWER_ASCII_INFO) {
812       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
813       PetscFunctionReturn(0);
814     }
815   }
816 
817   if (isdraw) {
818     PetscDraw       draw;
819     PetscTruth isnull;
820     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
821     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
822   }
823 
824   if (size == 1) {
825     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
826     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
827   } else {
828     /* assemble the entire matrix onto first processor. */
829     Mat         A;
830     Mat_SeqSBAIJ *Aloc;
831     Mat_SeqBAIJ *Bloc;
832     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
833     MatScalar   *a;
834 
835     /* Should this be the same type as mat? */
836     if (!rank) {
837       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
838     } else {
839       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
840     }
841     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
842     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
843     PetscLogObjectParent(mat,A);
844 
845     /* copy over the A part */
846     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
847     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
848     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
849 
850     for (i=0; i<mbs; i++) {
851       rvals[0] = bs*(baij->rstart + i);
852       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
853       for (j=ai[i]; j<ai[i+1]; j++) {
854         col = (baij->cstart+aj[j])*bs;
855         for (k=0; k<bs; k++) {
856           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
857           col++; a += bs;
858         }
859       }
860     }
861     /* copy over the B part */
862     Bloc = (Mat_SeqBAIJ*)baij->B->data;
863     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
864     for (i=0; i<mbs; i++) {
865       rvals[0] = bs*(baij->rstart + i);
866       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
867       for (j=ai[i]; j<ai[i+1]; j++) {
868         col = baij->garray[aj[j]]*bs;
869         for (k=0; k<bs; k++) {
870           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
871           col++; a += bs;
872         }
873       }
874     }
875     ierr = PetscFree(rvals);CHKERRQ(ierr);
876     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
877     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
878     /*
879        Everyone has to call to draw the matrix since the graphics waits are
880        synchronized across all processors that share the PetscDraw object
881     */
882     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
883     if (!rank) {
884       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
885       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
886     }
887     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
888     ierr = MatDestroy(A);CHKERRQ(ierr);
889   }
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatView_MPISBAIJ"
895 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
896 {
897   PetscErrorCode ierr;
898   PetscTruth     iascii,isdraw,issocket,isbinary;
899 
900   PetscFunctionBegin;
901   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
905   if (iascii || isdraw || issocket || isbinary) {
906     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
907   } else {
908     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
909   }
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatDestroy_MPISBAIJ"
915 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
916 {
917   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
918   PetscErrorCode ierr;
919 
920   PetscFunctionBegin;
921 #if defined(PETSC_USE_LOG)
922   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
923 #endif
924   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
925   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
926   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
927   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
928   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
929 #if defined (PETSC_USE_CTABLE)
930   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
931 #else
932   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
933 #endif
934   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
935   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
936   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
937   if (baij->slvec0) {
938     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
939     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
940   }
941   if (baij->slvec1) {
942     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
943     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
944     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
945   }
946   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
947   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
948   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
949   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
950 #if defined(PETSC_USE_MAT_SINGLE)
951   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
952 #endif
953   ierr = PetscFree(baij);CHKERRQ(ierr);
954 
955   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
956   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
957   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
958   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMult_MPISBAIJ"
964 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968   PetscInt       nt,mbs=a->mbs,bs=a->bs;
969   PetscScalar    *x,*from,zero=0.0;
970 
971   PetscFunctionBegin;
972   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
973   if (nt != A->n) {
974     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
975   }
976   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
977   if (nt != A->m) {
978     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
979   }
980 
981   /* diagonal part */
982   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
983   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
984 
985   /* subdiagonal part */
986   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
987 
988   /* copy x into the vec slvec0 */
989   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
990   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
991   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
992   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
993 
994   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
995   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
996   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
997 
998   /* supperdiagonal part */
999   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1000 
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1006 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1007 {
1008   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1009   PetscErrorCode ierr;
1010   PetscInt       nt;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1014   if (nt != A->n) {
1015     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1016   }
1017   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1018   if (nt != A->m) {
1019     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1020   }
1021 
1022   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1023   /* do diagonal part */
1024   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1025   /* do supperdiagonal part */
1026   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1027   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1028   /* do subdiagonal part */
1029   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1030   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1031   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1032 
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1038 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1039 {
1040   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1041   PetscErrorCode ierr;
1042   PetscInt       mbs=a->mbs,bs=a->bs;
1043   PetscScalar    *x,*from,zero=0.0;
1044 
1045   PetscFunctionBegin;
1046   /*
1047   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1048   PetscSynchronizedFlush(A->comm);
1049   */
1050   /* diagonal part */
1051   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1052   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1053 
1054   /* subdiagonal part */
1055   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1056 
1057   /* copy x into the vec slvec0 */
1058   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1059   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1060   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1061   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1062 
1063   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1064   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1065   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1066 
1067   /* supperdiagonal part */
1068   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1075 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1076 {
1077   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1082   /* do diagonal part */
1083   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1084   /* do supperdiagonal part */
1085   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1086   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1087 
1088   /* do subdiagonal part */
1089   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1090   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1091   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1092 
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1098 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1099 {
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1109 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119   This only works correctly for square matrices where the subblock A->A is the
1120    diagonal block
1121 */
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1124 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1131   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatScale_MPISBAIJ"
1137 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1138 {
1139   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1144   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1150 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1151 {
1152   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1153   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1154   PetscErrorCode ierr;
1155   PetscInt       bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1156   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1157   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1158 
1159   PetscFunctionBegin;
1160   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1161   mat->getrowactive = PETSC_TRUE;
1162 
1163   if (!mat->rowvalues && (idx || v)) {
1164     /*
1165         allocate enough space to hold information from the longest row.
1166     */
1167     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1168     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1169     PetscInt     max = 1,mbs = mat->mbs,tmp;
1170     for (i=0; i<mbs; i++) {
1171       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1172       if (max < tmp) { max = tmp; }
1173     }
1174     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1175     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1176   }
1177 
1178   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1179   lrow = row - brstart;  /* local row index */
1180 
1181   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1182   if (!v)   {pvA = 0; pvB = 0;}
1183   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1184   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1185   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1186   nztot = nzA + nzB;
1187 
1188   cmap  = mat->garray;
1189   if (v  || idx) {
1190     if (nztot) {
1191       /* Sort by increasing column numbers, assuming A and B already sorted */
1192       PetscInt imark = -1;
1193       if (v) {
1194         *v = v_p = mat->rowvalues;
1195         for (i=0; i<nzB; i++) {
1196           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1197           else break;
1198         }
1199         imark = i;
1200         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1201         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1202       }
1203       if (idx) {
1204         *idx = idx_p = mat->rowindices;
1205         if (imark > -1) {
1206           for (i=0; i<imark; i++) {
1207             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1208           }
1209         } else {
1210           for (i=0; i<nzB; i++) {
1211             if (cmap[cworkB[i]/bs] < cstart)
1212               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1213             else break;
1214           }
1215           imark = i;
1216         }
1217         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1218         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1219       }
1220     } else {
1221       if (idx) *idx = 0;
1222       if (v)   *v   = 0;
1223     }
1224   }
1225   *nz = nztot;
1226   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1227   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1233 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1234 {
1235   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1236 
1237   PetscFunctionBegin;
1238   if (baij->getrowactive == PETSC_FALSE) {
1239     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1240   }
1241   baij->getrowactive = PETSC_FALSE;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1247 PetscErrorCode MatGetBlockSize_MPISBAIJ(Mat mat,PetscInt *bs)
1248 {
1249   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1250 
1251   PetscFunctionBegin;
1252   *bs = baij->bs;
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1258 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1259 {
1260   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1265   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1271 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1272 {
1273   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1274   Mat            A = a->A,B = a->B;
1275   PetscErrorCode ierr;
1276   PetscReal      isend[5],irecv[5];
1277 
1278   PetscFunctionBegin;
1279   info->block_size     = (PetscReal)a->bs;
1280   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1281   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1282   isend[3] = info->memory;  isend[4] = info->mallocs;
1283   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1284   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1285   isend[3] += info->memory;  isend[4] += info->mallocs;
1286   if (flag == MAT_LOCAL) {
1287     info->nz_used      = isend[0];
1288     info->nz_allocated = isend[1];
1289     info->nz_unneeded  = isend[2];
1290     info->memory       = isend[3];
1291     info->mallocs      = isend[4];
1292   } else if (flag == MAT_GLOBAL_MAX) {
1293     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1294     info->nz_used      = irecv[0];
1295     info->nz_allocated = irecv[1];
1296     info->nz_unneeded  = irecv[2];
1297     info->memory       = irecv[3];
1298     info->mallocs      = irecv[4];
1299   } else if (flag == MAT_GLOBAL_SUM) {
1300     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1301     info->nz_used      = irecv[0];
1302     info->nz_allocated = irecv[1];
1303     info->nz_unneeded  = irecv[2];
1304     info->memory       = irecv[3];
1305     info->mallocs      = irecv[4];
1306   } else {
1307     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",flag);
1308   }
1309   info->rows_global       = (PetscReal)A->M;
1310   info->columns_global    = (PetscReal)A->N;
1311   info->rows_local        = (PetscReal)A->m;
1312   info->columns_local     = (PetscReal)A->N;
1313   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1314   info->fill_ratio_needed = 0;
1315   info->factor_mallocs    = 0;
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1321 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1322 {
1323   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   switch (op) {
1328   case MAT_NO_NEW_NONZERO_LOCATIONS:
1329   case MAT_YES_NEW_NONZERO_LOCATIONS:
1330   case MAT_COLUMNS_UNSORTED:
1331   case MAT_COLUMNS_SORTED:
1332   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1333   case MAT_KEEP_ZEROED_ROWS:
1334   case MAT_NEW_NONZERO_LOCATION_ERR:
1335     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1336     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1337     break;
1338   case MAT_ROW_ORIENTED:
1339     a->roworiented = PETSC_TRUE;
1340     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1341     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1342     break;
1343   case MAT_ROWS_SORTED:
1344   case MAT_ROWS_UNSORTED:
1345   case MAT_YES_NEW_DIAGONALS:
1346     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1347     break;
1348   case MAT_COLUMN_ORIENTED:
1349     a->roworiented = PETSC_FALSE;
1350     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1351     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1352     break;
1353   case MAT_IGNORE_OFF_PROC_ENTRIES:
1354     a->donotstash = PETSC_TRUE;
1355     break;
1356   case MAT_NO_NEW_DIAGONALS:
1357     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1358   case MAT_USE_HASH_TABLE:
1359     a->ht_flag = PETSC_TRUE;
1360     break;
1361   case MAT_NOT_SYMMETRIC:
1362   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1363   case MAT_HERMITIAN:
1364     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1365   case MAT_SYMMETRIC:
1366   case MAT_STRUCTURALLY_SYMMETRIC:
1367   case MAT_NOT_HERMITIAN:
1368   case MAT_SYMMETRY_ETERNAL:
1369   case MAT_NOT_SYMMETRY_ETERNAL:
1370     break;
1371   default:
1372     SETERRQ(PETSC_ERR_SUP,"unknown option");
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1379 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1380 {
1381   PetscErrorCode ierr;
1382   PetscFunctionBegin;
1383   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1389 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1390 {
1391   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1392   Mat            a = baij->A,b = baij->B;
1393   PetscErrorCode ierr;
1394   PetscInt       s1,s2,s3;
1395 
1396   PetscFunctionBegin;
1397   if (ll != rr) {
1398     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1399   }
1400   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1401   if (rr) {
1402     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1403     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1404     /* Overlap communication with computation. */
1405     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1406     /*} if (ll) { */
1407     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1408     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1409     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1410     /* } */
1411   /* scale  the diagonal block */
1412   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1413 
1414   /* if (rr) { */
1415     /* Do a scatter end and then right scale the off-diagonal block */
1416     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1417     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1418   }
1419 
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1425 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1426 {
1427   PetscFunctionBegin;
1428   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1433 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1434 {
1435   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1436   MPI_Comm          comm = A->comm;
1437   static PetscTruth called = PETSC_FALSE;
1438   PetscErrorCode    ierr;
1439 
1440   PetscFunctionBegin;
1441   if (!a->rank) {
1442     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1443   }
1444   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1445   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1446   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1452 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1453 {
1454   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1455   PetscErrorCode ierr;
1456 
1457   PetscFunctionBegin;
1458   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatEqual_MPISBAIJ"
1466 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1467 {
1468   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1469   Mat            a,b,c,d;
1470   PetscTruth     flg;
1471   PetscErrorCode ierr;
1472 
1473   PetscFunctionBegin;
1474   a = matA->A; b = matA->B;
1475   c = matB->A; d = matB->B;
1476 
1477   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1478   if (flg == PETSC_TRUE) {
1479     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1480   }
1481   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1487 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1488 {
1489   PetscErrorCode ierr;
1490 
1491   PetscFunctionBegin;
1492   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1498 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1499 {
1500   PetscErrorCode ierr;
1501   PetscInt       i;
1502   PetscTruth     flg;
1503 
1504   PetscFunctionBegin;
1505   for (i=0; i<n; i++) {
1506     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1507     if (!flg) {
1508       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1509     }
1510   }
1511   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 
1516 /* -------------------------------------------------------------------*/
1517 static struct _MatOps MatOps_Values = {
1518        MatSetValues_MPISBAIJ,
1519        MatGetRow_MPISBAIJ,
1520        MatRestoreRow_MPISBAIJ,
1521        MatMult_MPISBAIJ,
1522 /* 4*/ MatMultAdd_MPISBAIJ,
1523        MatMultTranspose_MPISBAIJ,
1524        MatMultTransposeAdd_MPISBAIJ,
1525        0,
1526        0,
1527        0,
1528 /*10*/ 0,
1529        0,
1530        0,
1531        MatRelax_MPISBAIJ,
1532        MatTranspose_MPISBAIJ,
1533 /*15*/ MatGetInfo_MPISBAIJ,
1534        MatEqual_MPISBAIJ,
1535        MatGetDiagonal_MPISBAIJ,
1536        MatDiagonalScale_MPISBAIJ,
1537        MatNorm_MPISBAIJ,
1538 /*20*/ MatAssemblyBegin_MPISBAIJ,
1539        MatAssemblyEnd_MPISBAIJ,
1540        0,
1541        MatSetOption_MPISBAIJ,
1542        MatZeroEntries_MPISBAIJ,
1543 /*25*/ MatZeroRows_MPISBAIJ,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*35*/ MatDuplicate_MPISBAIJ,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*40*/ 0,
1559        MatGetSubMatrices_MPISBAIJ,
1560        MatIncreaseOverlap_MPISBAIJ,
1561        MatGetValues_MPISBAIJ,
1562        0,
1563 /*45*/ MatPrintHelp_MPISBAIJ,
1564        MatScale_MPISBAIJ,
1565        0,
1566        0,
1567        0,
1568 /*50*/ MatGetBlockSize_MPISBAIJ,
1569        0,
1570        0,
1571        0,
1572        0,
1573 /*55*/ 0,
1574        0,
1575        MatSetUnfactored_MPISBAIJ,
1576        0,
1577        MatSetValuesBlocked_MPISBAIJ,
1578 /*60*/ 0,
1579        0,
1580        0,
1581        MatGetPetscMaps_Petsc,
1582        0,
1583 /*65*/ 0,
1584        0,
1585        0,
1586        0,
1587        0,
1588 /*70*/ MatGetRowMax_MPISBAIJ,
1589        0,
1590        0,
1591        0,
1592        0,
1593 /*75*/ 0,
1594        0,
1595        0,
1596        0,
1597        0,
1598 /*80*/ 0,
1599        0,
1600        0,
1601        0,
1602        MatLoad_MPISBAIJ,
1603 /*85*/ 0,
1604        0,
1605        0,
1606        0,
1607        0,
1608 /*90*/ 0,
1609        0,
1610        0,
1611        0,
1612        0,
1613 /*95*/ 0,
1614        0,
1615        0,
1616        0};
1617 
1618 
1619 EXTERN_C_BEGIN
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1622 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1623 {
1624   PetscFunctionBegin;
1625   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1626   *iscopy = PETSC_FALSE;
1627   PetscFunctionReturn(0);
1628 }
1629 EXTERN_C_END
1630 
1631 EXTERN_C_BEGIN
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1634 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1635 {
1636   Mat_MPISBAIJ   *b;
1637   PetscErrorCode ierr;
1638   PetscInt       i,mbs,Mbs;
1639 
1640   PetscFunctionBegin;
1641   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1642 
1643   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1644   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1645   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1646   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1647   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1648   if (d_nnz) {
1649     for (i=0; i<B->m/bs; i++) {
1650       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1651     }
1652   }
1653   if (o_nnz) {
1654     for (i=0; i<B->m/bs; i++) {
1655       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1656     }
1657   }
1658   B->preallocated = PETSC_TRUE;
1659   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1660   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1661   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1662   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1663 
1664   b   = (Mat_MPISBAIJ*)B->data;
1665   mbs = B->m/bs;
1666   Mbs = B->M/bs;
1667   if (mbs*bs != B->m) {
1668     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1669   }
1670 
1671   b->bs  = bs;
1672   b->bs2 = bs*bs;
1673   b->mbs = mbs;
1674   b->nbs = mbs;
1675   b->Mbs = Mbs;
1676   b->Nbs = Mbs;
1677 
1678   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1679   b->rowners[0]    = 0;
1680   for (i=2; i<=b->size; i++) {
1681     b->rowners[i] += b->rowners[i-1];
1682   }
1683   b->rstart    = b->rowners[b->rank];
1684   b->rend      = b->rowners[b->rank+1];
1685   b->cstart    = b->rstart;
1686   b->cend      = b->rend;
1687   for (i=0; i<=b->size; i++) {
1688     b->rowners_bs[i] = b->rowners[i]*bs;
1689   }
1690   b->rstart_bs = b-> rstart*bs;
1691   b->rend_bs   = b->rend*bs;
1692 
1693   b->cstart_bs = b->cstart*bs;
1694   b->cend_bs   = b->cend*bs;
1695 
1696   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1697   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1698   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1699   PetscLogObjectParent(B,b->A);
1700 
1701   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1702   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1703   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1704   PetscLogObjectParent(B,b->B);
1705 
1706   /* build cache for off array entries formed */
1707   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1708 
1709   PetscFunctionReturn(0);
1710 }
1711 EXTERN_C_END
1712 
1713 /*MC
1714    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1715    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1716 
1717    Options Database Keys:
1718 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1719 
1720   Level: beginner
1721 
1722 .seealso: MatCreateMPISBAIJ
1723 M*/
1724 
1725 EXTERN_C_BEGIN
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatCreate_MPISBAIJ"
1728 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1729 {
1730   Mat_MPISBAIJ   *b;
1731   PetscErrorCode ierr;
1732   PetscTruth     flg;
1733 
1734   PetscFunctionBegin;
1735 
1736   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1737   B->data = (void*)b;
1738   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1739   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1740 
1741   B->ops->destroy    = MatDestroy_MPISBAIJ;
1742   B->ops->view       = MatView_MPISBAIJ;
1743   B->mapping    = 0;
1744   B->factor     = 0;
1745   B->assembled  = PETSC_FALSE;
1746 
1747   B->insertmode = NOT_SET_VALUES;
1748   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1749   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1750 
1751   /* build local table of row and column ownerships */
1752   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1753   b->cowners    = b->rowners + b->size + 2;
1754   b->rowners_bs = b->cowners + b->size + 2;
1755   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1756 
1757   /* build cache for off array entries formed */
1758   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1759   b->donotstash  = PETSC_FALSE;
1760   b->colmap      = PETSC_NULL;
1761   b->garray      = PETSC_NULL;
1762   b->roworiented = PETSC_TRUE;
1763 
1764 #if defined(PETSC_USE_MAT_SINGLE)
1765   /* stuff for MatSetValues_XXX in single precision */
1766   b->setvalueslen     = 0;
1767   b->setvaluescopy    = PETSC_NULL;
1768 #endif
1769 
1770   /* stuff used in block assembly */
1771   b->barray       = 0;
1772 
1773   /* stuff used for matrix vector multiply */
1774   b->lvec         = 0;
1775   b->Mvctx        = 0;
1776   b->slvec0       = 0;
1777   b->slvec0b      = 0;
1778   b->slvec1       = 0;
1779   b->slvec1a      = 0;
1780   b->slvec1b      = 0;
1781   b->sMvctx       = 0;
1782 
1783   /* stuff for MatGetRow() */
1784   b->rowindices   = 0;
1785   b->rowvalues    = 0;
1786   b->getrowactive = PETSC_FALSE;
1787 
1788   /* hash table stuff */
1789   b->ht           = 0;
1790   b->hd           = 0;
1791   b->ht_size      = 0;
1792   b->ht_flag      = PETSC_FALSE;
1793   b->ht_fact      = 0;
1794   b->ht_total_ct  = 0;
1795   b->ht_insert_ct = 0;
1796 
1797   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1798   if (flg) {
1799     PetscReal fact = 1.39;
1800     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1801     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1802     if (fact <= 1.0) fact = 1.39;
1803     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1804     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1805   }
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1807                                      "MatStoreValues_MPISBAIJ",
1808                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1810                                      "MatRetrieveValues_MPISBAIJ",
1811                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1813                                      "MatGetDiagonalBlock_MPISBAIJ",
1814                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1816                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1817                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1818   B->symmetric                  = PETSC_TRUE;
1819   B->structurally_symmetric     = PETSC_TRUE;
1820   B->symmetric_set              = PETSC_TRUE;
1821   B->structurally_symmetric_set = PETSC_TRUE;
1822   PetscFunctionReturn(0);
1823 }
1824 EXTERN_C_END
1825 
1826 /*MC
1827    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1828 
1829    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1830    and MATMPISBAIJ otherwise.
1831 
1832    Options Database Keys:
1833 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1834 
1835   Level: beginner
1836 
1837 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1838 M*/
1839 
1840 EXTERN_C_BEGIN
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatCreate_SBAIJ"
1843 PetscErrorCode MatCreate_SBAIJ(Mat A)
1844 {
1845   PetscErrorCode ierr;
1846   PetscMPIInt    size;
1847 
1848   PetscFunctionBegin;
1849   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1850   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1851   if (size == 1) {
1852     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1853   } else {
1854     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 EXTERN_C_END
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1862 /*@C
1863    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1864    the user should preallocate the matrix storage by setting the parameters
1865    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1866    performance can be increased by more than a factor of 50.
1867 
1868    Collective on Mat
1869 
1870    Input Parameters:
1871 +  A - the matrix
1872 .  bs   - size of blockk
1873 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1874            submatrix  (same for all local rows)
1875 .  d_nnz - array containing the number of block nonzeros in the various block rows
1876            in the upper triangular and diagonal part of the in diagonal portion of the local
1877            (possibly different for each block row) or PETSC_NULL.  You must leave room
1878            for the diagonal entry even if it is zero.
1879 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1880            submatrix (same for all local rows).
1881 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1882            off-diagonal portion of the local submatrix (possibly different for
1883            each block row) or PETSC_NULL.
1884 
1885 
1886    Options Database Keys:
1887 .   -mat_no_unroll - uses code that does not unroll the loops in the
1888                      block calculations (much slower)
1889 .   -mat_block_size - size of the blocks to use
1890 
1891    Notes:
1892 
1893    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1894    than it must be used on all processors that share the object for that argument.
1895 
1896    Storage Information:
1897    For a square global matrix we define each processor's diagonal portion
1898    to be its local rows and the corresponding columns (a square submatrix);
1899    each processor's off-diagonal portion encompasses the remainder of the
1900    local matrix (a rectangular submatrix).
1901 
1902    The user can specify preallocated storage for the diagonal part of
1903    the local submatrix with either d_nz or d_nnz (not both).  Set
1904    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1905    memory allocation.  Likewise, specify preallocated storage for the
1906    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1907 
1908    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1909    the figure below we depict these three local rows and all columns (0-11).
1910 
1911 .vb
1912            0 1 2 3 4 5 6 7 8 9 10 11
1913           -------------------
1914    row 3  |  o o o d d d o o o o o o
1915    row 4  |  o o o d d d o o o o o o
1916    row 5  |  o o o d d d o o o o o o
1917           -------------------
1918 .ve
1919 
1920    Thus, any entries in the d locations are stored in the d (diagonal)
1921    submatrix, and any entries in the o locations are stored in the
1922    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1923    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1924 
1925    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1926    plus the diagonal part of the d matrix,
1927    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1928    In general, for PDE problems in which most nonzeros are near the diagonal,
1929    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1930    or you will get TERRIBLE performance; see the users' manual chapter on
1931    matrices.
1932 
1933    Level: intermediate
1934 
1935 .keywords: matrix, block, aij, compressed row, sparse, parallel
1936 
1937 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1938 @*/
1939 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1940 {
1941   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1942 
1943   PetscFunctionBegin;
1944   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1945   if (f) {
1946     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatCreateMPISBAIJ"
1953 /*@C
1954    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1955    (block compressed row).  For good matrix assembly performance
1956    the user should preallocate the matrix storage by setting the parameters
1957    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1958    performance can be increased by more than a factor of 50.
1959 
1960    Collective on MPI_Comm
1961 
1962    Input Parameters:
1963 +  comm - MPI communicator
1964 .  bs   - size of blockk
1965 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1966            This value should be the same as the local size used in creating the
1967            y vector for the matrix-vector product y = Ax.
1968 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1969            This value should be the same as the local size used in creating the
1970            x vector for the matrix-vector product y = Ax.
1971 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1972 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1973 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1974            submatrix  (same for all local rows)
1975 .  d_nnz - array containing the number of block nonzeros in the various block rows
1976            in the upper triangular portion of the in diagonal portion of the local
1977            (possibly different for each block block row) or PETSC_NULL.
1978            You must leave room for the diagonal entry even if it is zero.
1979 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1980            submatrix (same for all local rows).
1981 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1982            off-diagonal portion of the local submatrix (possibly different for
1983            each block row) or PETSC_NULL.
1984 
1985    Output Parameter:
1986 .  A - the matrix
1987 
1988    Options Database Keys:
1989 .   -mat_no_unroll - uses code that does not unroll the loops in the
1990                      block calculations (much slower)
1991 .   -mat_block_size - size of the blocks to use
1992 .   -mat_mpi - use the parallel matrix data structures even on one processor
1993                (defaults to using SeqBAIJ format on one processor)
1994 
1995    Notes:
1996    The user MUST specify either the local or global matrix dimensions
1997    (possibly both).
1998 
1999    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2000    than it must be used on all processors that share the object for that argument.
2001 
2002    Storage Information:
2003    For a square global matrix we define each processor's diagonal portion
2004    to be its local rows and the corresponding columns (a square submatrix);
2005    each processor's off-diagonal portion encompasses the remainder of the
2006    local matrix (a rectangular submatrix).
2007 
2008    The user can specify preallocated storage for the diagonal part of
2009    the local submatrix with either d_nz or d_nnz (not both).  Set
2010    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2011    memory allocation.  Likewise, specify preallocated storage for the
2012    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2013 
2014    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2015    the figure below we depict these three local rows and all columns (0-11).
2016 
2017 .vb
2018            0 1 2 3 4 5 6 7 8 9 10 11
2019           -------------------
2020    row 3  |  o o o d d d o o o o o o
2021    row 4  |  o o o d d d o o o o o o
2022    row 5  |  o o o d d d o o o o o o
2023           -------------------
2024 .ve
2025 
2026    Thus, any entries in the d locations are stored in the d (diagonal)
2027    submatrix, and any entries in the o locations are stored in the
2028    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2029    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2030 
2031    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2032    plus the diagonal part of the d matrix,
2033    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2034    In general, for PDE problems in which most nonzeros are near the diagonal,
2035    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2036    or you will get TERRIBLE performance; see the users' manual chapter on
2037    matrices.
2038 
2039    Level: intermediate
2040 
2041 .keywords: matrix, block, aij, compressed row, sparse, parallel
2042 
2043 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2044 @*/
2045 
2046 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2047 {
2048   PetscErrorCode ierr;
2049   PetscMPIInt    size;
2050 
2051   PetscFunctionBegin;
2052   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2053   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2054   if (size > 1) {
2055     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2056     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2057   } else {
2058     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2059     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2067 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2068 {
2069   Mat            mat;
2070   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2071   PetscErrorCode ierr;
2072   PetscInt       len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2073   PetscScalar    *array;
2074 
2075   PetscFunctionBegin;
2076   *newmat       = 0;
2077   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2078   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2079   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2080 
2081   mat->factor       = matin->factor;
2082   mat->preallocated = PETSC_TRUE;
2083   mat->assembled    = PETSC_TRUE;
2084   mat->insertmode   = NOT_SET_VALUES;
2085 
2086   a = (Mat_MPISBAIJ*)mat->data;
2087   a->bs  = oldmat->bs;
2088   a->bs2 = oldmat->bs2;
2089   a->mbs = oldmat->mbs;
2090   a->nbs = oldmat->nbs;
2091   a->Mbs = oldmat->Mbs;
2092   a->Nbs = oldmat->Nbs;
2093 
2094   a->rstart       = oldmat->rstart;
2095   a->rend         = oldmat->rend;
2096   a->cstart       = oldmat->cstart;
2097   a->cend         = oldmat->cend;
2098   a->size         = oldmat->size;
2099   a->rank         = oldmat->rank;
2100   a->donotstash   = oldmat->donotstash;
2101   a->roworiented  = oldmat->roworiented;
2102   a->rowindices   = 0;
2103   a->rowvalues    = 0;
2104   a->getrowactive = PETSC_FALSE;
2105   a->barray       = 0;
2106   a->rstart_bs    = oldmat->rstart_bs;
2107   a->rend_bs      = oldmat->rend_bs;
2108   a->cstart_bs    = oldmat->cstart_bs;
2109   a->cend_bs      = oldmat->cend_bs;
2110 
2111   /* hash table stuff */
2112   a->ht           = 0;
2113   a->hd           = 0;
2114   a->ht_size      = 0;
2115   a->ht_flag      = oldmat->ht_flag;
2116   a->ht_fact      = oldmat->ht_fact;
2117   a->ht_total_ct  = 0;
2118   a->ht_insert_ct = 0;
2119 
2120   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2121   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2122   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2123   if (oldmat->colmap) {
2124 #if defined (PETSC_USE_CTABLE)
2125     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2126 #else
2127     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2128     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2129     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2130 #endif
2131   } else a->colmap = 0;
2132 
2133   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2134     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2135     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2136     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2137   } else a->garray = 0;
2138 
2139   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2140   PetscLogObjectParent(mat,a->lvec);
2141   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2142   PetscLogObjectParent(mat,a->Mvctx);
2143 
2144   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2145   PetscLogObjectParent(mat,a->slvec0);
2146   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2147   PetscLogObjectParent(mat,a->slvec1);
2148 
2149   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2150   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2151   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2152   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2153   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2154   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2155   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2156   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2157   PetscLogObjectParent(mat,a->slvec0);
2158   PetscLogObjectParent(mat,a->slvec1);
2159   PetscLogObjectParent(mat,a->slvec0b);
2160   PetscLogObjectParent(mat,a->slvec1a);
2161   PetscLogObjectParent(mat,a->slvec1b);
2162 
2163   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2164   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2165   a->sMvctx = oldmat->sMvctx;
2166   PetscLogObjectParent(mat,a->sMvctx);
2167 
2168   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2169   PetscLogObjectParent(mat,a->A);
2170   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2171   PetscLogObjectParent(mat,a->B);
2172   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2173   *newmat = mat;
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #include "petscsys.h"
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatLoad_MPISBAIJ"
2181 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2182 {
2183   Mat            A;
2184   PetscErrorCode ierr;
2185   PetscInt       i,nz,j,rstart,rend,fd;
2186   PetscScalar    *vals,*buf;
2187   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2188   MPI_Status     status;
2189   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2190   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2191   PetscInt       *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2192   PetscInt       bs=1,Mbs,mbs,extra_rows;
2193   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2194   PetscInt       dcount,kmax,k,nzcount,tmp;
2195 
2196   PetscFunctionBegin;
2197   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2198 
2199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2200   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2201   if (!rank) {
2202     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2203     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2204     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2205     if (header[3] < 0) {
2206       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2207     }
2208   }
2209 
2210   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2211   M = header[1]; N = header[2];
2212 
2213   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2214 
2215   /*
2216      This code adds extra rows to make sure the number of rows is
2217      divisible by the blocksize
2218   */
2219   Mbs        = M/bs;
2220   extra_rows = bs - M + bs*(Mbs);
2221   if (extra_rows == bs) extra_rows = 0;
2222   else                  Mbs++;
2223   if (extra_rows &&!rank) {
2224     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2225   }
2226 
2227   /* determine ownership of all rows */
2228   mbs        = Mbs/size + ((Mbs % size) > rank);
2229   m          = mbs*bs;
2230   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2231   browners   = rowners + size + 1;
2232   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2233   rowners[0] = 0;
2234   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2235   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2236   rstart = rowners[rank];
2237   rend   = rowners[rank+1];
2238 
2239   /* distribute row lengths to all processors */
2240   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2241   if (!rank) {
2242     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2243     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2244     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2245     ierr = PetscMalloc(size*sizeof(PetscInt),&sndcounts);CHKERRQ(ierr);
2246     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2247     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2248     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2249   } else {
2250     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2251   }
2252 
2253   if (!rank) {   /* procs[0] */
2254     /* calculate the number of nonzeros on each processor */
2255     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2256     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2257     for (i=0; i<size; i++) {
2258       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2259         procsnz[i] += rowlengths[j];
2260       }
2261     }
2262     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2263 
2264     /* determine max buffer needed and allocate it */
2265     maxnz = 0;
2266     for (i=0; i<size; i++) {
2267       maxnz = PetscMax(maxnz,procsnz[i]);
2268     }
2269     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2270 
2271     /* read in my part of the matrix column indices  */
2272     nz     = procsnz[0];
2273     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2274     mycols = ibuf;
2275     if (size == 1)  nz -= extra_rows;
2276     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2277     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2278 
2279     /* read in every ones (except the last) and ship off */
2280     for (i=1; i<size-1; i++) {
2281       nz   = procsnz[i];
2282       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2283       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2284     }
2285     /* read in the stuff for the last proc */
2286     if (size != 1) {
2287       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2288       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2289       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2290       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2291     }
2292     ierr = PetscFree(cols);CHKERRQ(ierr);
2293   } else {  /* procs[i], i>0 */
2294     /* determine buffer space needed for message */
2295     nz = 0;
2296     for (i=0; i<m; i++) {
2297       nz += locrowlens[i];
2298     }
2299     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2300     mycols = ibuf;
2301     /* receive message of column indices*/
2302     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2303     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2304     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2305   }
2306 
2307   /* loop over local rows, determining number of off diagonal entries */
2308   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2309   odlens   = dlens + (rend-rstart);
2310   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2311   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2312   masked1  = mask    + Mbs;
2313   masked2  = masked1 + Mbs;
2314   rowcount = 0; nzcount = 0;
2315   for (i=0; i<mbs; i++) {
2316     dcount  = 0;
2317     odcount = 0;
2318     for (j=0; j<bs; j++) {
2319       kmax = locrowlens[rowcount];
2320       for (k=0; k<kmax; k++) {
2321         tmp = mycols[nzcount++]/bs; /* block col. index */
2322         if (!mask[tmp]) {
2323           mask[tmp] = 1;
2324           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2325           else masked1[dcount++] = tmp; /* entry in diag portion */
2326         }
2327       }
2328       rowcount++;
2329     }
2330 
2331     dlens[i]  = dcount;  /* d_nzz[i] */
2332     odlens[i] = odcount; /* o_nzz[i] */
2333 
2334     /* zero out the mask elements we set */
2335     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2336     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2337   }
2338 
2339   /* create our matrix */
2340   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2341   ierr = MatSetType(A,type);CHKERRQ(ierr);
2342   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2343   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2344 
2345   if (!rank) {
2346     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2347     /* read in my part of the matrix numerical values  */
2348     nz = procsnz[0];
2349     vals = buf;
2350     mycols = ibuf;
2351     if (size == 1)  nz -= extra_rows;
2352     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2353     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2354 
2355     /* insert into matrix */
2356     jj      = rstart*bs;
2357     for (i=0; i<m; i++) {
2358       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2359       mycols += locrowlens[i];
2360       vals   += locrowlens[i];
2361       jj++;
2362     }
2363 
2364     /* read in other processors (except the last one) and ship out */
2365     for (i=1; i<size-1; i++) {
2366       nz   = procsnz[i];
2367       vals = buf;
2368       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2369       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2370     }
2371     /* the last proc */
2372     if (size != 1){
2373       nz   = procsnz[i] - extra_rows;
2374       vals = buf;
2375       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2376       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2377       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2378     }
2379     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2380 
2381   } else {
2382     /* receive numeric values */
2383     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2384 
2385     /* receive message of values*/
2386     vals   = buf;
2387     mycols = ibuf;
2388     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2389     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2390     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2391 
2392     /* insert into matrix */
2393     jj      = rstart*bs;
2394     for (i=0; i<m; i++) {
2395       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2396       mycols += locrowlens[i];
2397       vals   += locrowlens[i];
2398       jj++;
2399     }
2400   }
2401 
2402   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2403   ierr = PetscFree(buf);CHKERRQ(ierr);
2404   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2405   ierr = PetscFree(rowners);CHKERRQ(ierr);
2406   ierr = PetscFree(dlens);CHKERRQ(ierr);
2407   ierr = PetscFree(mask);CHKERRQ(ierr);
2408   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2410   *newmat = A;
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2416 /*@
2417    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2418 
2419    Input Parameters:
2420 .  mat  - the matrix
2421 .  fact - factor
2422 
2423    Collective on Mat
2424 
2425    Level: advanced
2426 
2427   Notes:
2428    This can also be set by the command line option: -mat_use_hash_table fact
2429 
2430 .keywords: matrix, hashtable, factor, HT
2431 
2432 .seealso: MatSetOption()
2433 @*/
2434 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2435 {
2436   PetscFunctionBegin;
2437   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
2438   /* PetscFunctionReturn(0); */
2439 }
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2443 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2444 {
2445   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2446   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2447   PetscReal      atmp;
2448   PetscReal      *work,*svalues,*rvalues;
2449   PetscErrorCode ierr;
2450   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2451   PetscMPIInt    rank,size;
2452   PetscInt       *rowners_bs,dest,count,source;
2453   PetscScalar    *va;
2454   MatScalar      *ba;
2455   MPI_Status     stat;
2456 
2457   PetscFunctionBegin;
2458   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2459   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2460 
2461   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2462   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2463 
2464   bs   = a->bs;
2465   mbs  = a->mbs;
2466   Mbs  = a->Mbs;
2467   ba   = b->a;
2468   bi   = b->i;
2469   bj   = b->j;
2470   /*
2471   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2472   PetscSynchronizedFlush(A->comm);
2473   */
2474 
2475   /* find ownerships */
2476   rowners_bs = a->rowners_bs;
2477   /*
2478   if (!rank){
2479     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2480   }
2481   */
2482 
2483   /* each proc creates an array to be distributed */
2484   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2485   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2486 
2487   /* row_max for B */
2488   if (rank != size-1){
2489     for (i=0; i<mbs; i++) {
2490       ncols = bi[1] - bi[0]; bi++;
2491       brow  = bs*i;
2492       for (j=0; j<ncols; j++){
2493         bcol = bs*(*bj);
2494         for (kcol=0; kcol<bs; kcol++){
2495           col = bcol + kcol;                 /* local col index */
2496           col += rowners_bs[rank+1];      /* global col index */
2497           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2498           for (krow=0; krow<bs; krow++){
2499             atmp = PetscAbsScalar(*ba); ba++;
2500             row = brow + krow;    /* local row index */
2501             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2502             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2503             if (work[col] < atmp) work[col] = atmp;
2504           }
2505         }
2506         bj++;
2507       }
2508     }
2509     /*
2510       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2511       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2512       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2513       */
2514 
2515     /* send values to its owners */
2516     for (dest=rank+1; dest<size; dest++){
2517       svalues = work + rowners_bs[dest];
2518       count   = rowners_bs[dest+1]-rowners_bs[dest];
2519       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2520       /*
2521       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2522       PetscSynchronizedFlush(A->comm);
2523       */
2524     }
2525   }
2526 
2527   /* receive values */
2528   if (rank){
2529     rvalues = work;
2530     count   = rowners_bs[rank+1]-rowners_bs[rank];
2531     for (source=0; source<rank; source++){
2532       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2533       /* process values */
2534       for (i=0; i<count; i++){
2535         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2536       }
2537       /*
2538       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2539       PetscSynchronizedFlush(A->comm);
2540       */
2541     }
2542   }
2543 
2544   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2545   ierr = PetscFree(work);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 #undef __FUNCT__
2550 #define __FUNCT__ "MatRelax_MPISBAIJ"
2551 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2552 {
2553   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2554   PetscErrorCode ierr;
2555   PetscInt       mbs=mat->mbs,bs=mat->bs;
2556   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2557   Vec            bb1;
2558 
2559   PetscFunctionBegin;
2560   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2561   if (bs > 1)
2562     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2563 
2564   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2565     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2566       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2567       its--;
2568     }
2569 
2570     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2571     while (its--){
2572 
2573       /* lower triangular part: slvec0b = - B^T*xx */
2574       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2575 
2576       /* copy xx into slvec0a */
2577       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2578       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2579       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2580       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2581 
2582       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2583 
2584       /* copy bb into slvec1a */
2585       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2586       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2587       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2588       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2589 
2590       /* set slvec1b = 0 */
2591       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2592 
2593       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2594       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2595       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2596       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2597 
2598       /* upper triangular part: bb1 = bb1 - B*x */
2599       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2600 
2601       /* local diagonal sweep */
2602       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2603     }
2604     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2605   } else {
2606     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2607   }
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2613 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2614 {
2615   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2616   PetscErrorCode ierr;
2617   PetscScalar    mone=-1.0;
2618   Vec            lvec1,bb1;
2619 
2620   PetscFunctionBegin;
2621   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2622   if (mat->bs > 1)
2623     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2624 
2625   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2626     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2627       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2628       its--;
2629     }
2630 
2631     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2632     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2633     while (its--){
2634       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2635 
2636       /* lower diagonal part: bb1 = bb - B^T*xx */
2637       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2638       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2639 
2640       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2641       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2642       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2643 
2644       /* upper diagonal part: bb1 = bb1 - B*x */
2645       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2646       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2647 
2648       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2649 
2650       /* diagonal sweep */
2651       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2652     }
2653     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2654     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2655   } else {
2656     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2657   }
2658   PetscFunctionReturn(0);
2659 }
2660 
2661