xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d44834fb5aa6ab55eaa2e18f09dfd00e40aa4639)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
290   /* PetscFunctionReturn(0); */
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,N = m*n*b->bs2;
300   MatScalar      *vsingle;
301 
302   PetscFunctionBegin;
303   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
304   /* PetscFunctionReturn(0); */
305 }
306 #endif
307 
308 /* Only add/insert a(i,j) with i<=j (blocks).
309    Any a(i,j) with i>j input by user is ingored.
310 */
311 #undef __FUNCT__
312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
314 {
315   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
316   MatScalar      value;
317   PetscTruth     roworiented = baij->roworiented;
318   PetscErrorCode ierr;
319   PetscInt       i,j,row,col;
320   PetscInt       rstart_orig=baij->rstart_bs;
321   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
322   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
323 
324   /* Some Variables required in the macro */
325   Mat            A = baij->A;
326   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
327   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
328   MatScalar      *aa=a->a;
329 
330   Mat            B = baij->B;
331   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
332   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
333   MatScalar     *ba=b->a;
334 
335   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
336   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
337   MatScalar     *ap,*bap;
338 
339   /* for stash */
340   PetscInt      n_loc, *in_loc=0;
341   MatScalar     *v_loc=0;
342 
343   PetscFunctionBegin;
344 
345   if(!baij->donotstash){
346     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
347     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
348   }
349 
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_BOPT_g)
353     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
354 #endif
355     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
356       row = im[i] - rstart_orig;              /* local row index */
357       for (j=0; j<n; j++) {
358         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
359         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
360           col = in[j] - cstart_orig;          /* local col index */
361           brow = row/bs; bcol = col/bs;
362           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         } else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
369 #endif
370         else {  /* off-diag entry (B) */
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 #if defined (PETSC_USE_CTABLE)
376             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
377             col  = col - 1;
378 #else
379             col = baij->colmap[in[j]/bs] - 1;
380 #endif
381             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
382               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
383               col =  in[j];
384               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
385               B = baij->B;
386               b = (Mat_SeqBAIJ*)(B)->data;
387               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
388               ba=b->a;
389             } else col += in[j]%bs;
390           } else col = in[j];
391           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
392           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
393           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
394         }
395       }
396     } else {  /* off processor entry */
397       if (!baij->donotstash) {
398         n_loc = 0;
399         for (j=0; j<n; j++){
400           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
401           in_loc[n_loc] = in[j];
402           if (roworiented) {
403             v_loc[n_loc] = v[i*n+j];
404           } else {
405             v_loc[n_loc] = v[j*m+i];
406           }
407           n_loc++;
408         }
409         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
410       }
411     }
412   }
413 
414   if(!baij->donotstash){
415     ierr = PetscFree(in_loc);CHKERRQ(ierr);
416     ierr = PetscFree(v_loc);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
424 {
425   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
426   const MatScalar *value;
427   MatScalar       *barray=baij->barray;
428   PetscTruth      roworiented = baij->roworiented;
429   PetscErrorCode  ierr;
430   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
431   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
432   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
433 
434   PetscFunctionBegin;
435   if(!barray) {
436     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
437     baij->barray = barray;
438   }
439 
440   if (roworiented) {
441     stepval = (n-1)*bs;
442   } else {
443     stepval = (m-1)*bs;
444   }
445   for (i=0; i<m; i++) {
446     if (im[i] < 0) continue;
447 #if defined(PETSC_USE_BOPT_g)
448     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
449 #endif
450     if (im[i] >= rstart && im[i] < rend) {
451       row = im[i] - rstart;
452       for (j=0; j<n; j++) {
453         /* If NumCol = 1 then a copy is not required */
454         if ((roworiented) && (n == 1)) {
455           barray = (MatScalar*) v + i*bs2;
456         } else if((!roworiented) && (m == 1)) {
457           barray = (MatScalar*) v + j*bs2;
458         } else { /* Here a copy is required */
459           if (roworiented) {
460             value = v + i*(stepval+bs)*bs + j*bs;
461           } else {
462             value = v + j*(stepval+bs)*bs + i*bs;
463           }
464           for (ii=0; ii<bs; ii++,value+=stepval) {
465             for (jj=0; jj<bs; jj++) {
466               *barray++  = *value++;
467             }
468           }
469           barray -=bs2;
470         }
471 
472         if (in[j] >= cstart && in[j] < cend){
473           col  = in[j] - cstart;
474           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476         else if (in[j] < 0) continue;
477 #if defined(PETSC_USE_BOPT_g)
478         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
479 #endif
480         else {
481           if (mat->was_assembled) {
482             if (!baij->colmap) {
483               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
484             }
485 
486 #if defined(PETSC_USE_BOPT_g)
487 #if defined (PETSC_USE_CTABLE)
488             { PetscInt data;
489               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
490               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
491             }
492 #else
493             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
494 #endif
495 #endif
496 #if defined (PETSC_USE_CTABLE)
497 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
498             col  = (col - 1)/bs;
499 #else
500             col = (baij->colmap[in[j]] - 1)/bs;
501 #endif
502             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
503               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
504               col =  in[j];
505             }
506           }
507           else col = in[j];
508           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510       }
511     } else {
512       if (!baij->donotstash) {
513         if (roworiented) {
514           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #define HASH_KEY 0.6180339887
525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
526 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
527 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
528 #undef __FUNCT__
529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
531 {
532   PetscFunctionBegin;
533   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
534   /* PetscFunctionReturn(0); */
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
553   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
558     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
563         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend){
565           col = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined (PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data --;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else {
585       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
586     }
587   }
588  PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatNorm_MPISBAIJ"
593 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
594 {
595   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
596   PetscErrorCode ierr;
597   PetscReal      sum[2],*lnorm2;
598 
599   PetscFunctionBegin;
600   if (baij->size == 1) {
601     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
602   } else {
603     if (type == NORM_FROBENIUS) {
604       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
605       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
607       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
608       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       *norm = sqrt(sum[0] + 2*sum[1]);
611       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
612     } else {
613       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
614     }
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 /*
620   Creates the hash table, and sets the table
621   This table is created only once.
622   If new entried need to be added to the matrix
623   then the hash table has to be destroyed and
624   recreated.
625 */
626 #undef __FUNCT__
627 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
628 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
629 {
630   PetscFunctionBegin;
631   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
632   /* PetscFunctionReturn(0); */
633 }
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
637 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
638 {
639   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
640   PetscErrorCode ierr;
641   PetscInt       nstash,reallocs;
642   InsertMode     addv;
643 
644   PetscFunctionBegin;
645   if (baij->donotstash) {
646     PetscFunctionReturn(0);
647   }
648 
649   /* make sure all processors are either in INSERTMODE or ADDMODE */
650   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
651   if (addv == (ADD_VALUES|INSERT_VALUES)) {
652     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
653   }
654   mat->insertmode = addv; /* in case this processor had no cache */
655 
656   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
657   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
658   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
659   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
660   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
661   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNCT__
666 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
667 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
670   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
671   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
674   PetscInt       *row,*col,other_disassembled;
675   PetscMPIInt    n;
676   PetscTruth     r1,r2,r3;
677   MatScalar      *val;
678   InsertMode     addv = mat->insertmode;
679 
680   PetscFunctionBegin;
681 
682   if (!baij->donotstash) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       if (!flg) break;
686 
687       for (i=0; i<n;) {
688         /* Now identify the consecutive vals belonging to the same row */
689         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
690         if (j < n) ncols = j-i;
691         else       ncols = n-i;
692         /* Now assemble all these values with a single function call */
693         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
694         i = j;
695       }
696     }
697     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
698     /* Now process the block-stash. Since the values are stashed column-oriented,
699        set the roworiented flag to column oriented, and after MatSetValues()
700        restore the original flags */
701     r1 = baij->roworiented;
702     r2 = a->roworiented;
703     r3 = b->roworiented;
704     baij->roworiented = PETSC_FALSE;
705     a->roworiented    = PETSC_FALSE;
706     b->roworiented    = PETSC_FALSE;
707     while (1) {
708       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
709       if (!flg) break;
710 
711       for (i=0; i<n;) {
712         /* Now identify the consecutive vals belonging to the same row */
713         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
714         if (j < n) ncols = j-i;
715         else       ncols = n-i;
716         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
717         i = j;
718       }
719     }
720     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
721     baij->roworiented = r1;
722     a->roworiented    = r2;
723     b->roworiented    = r3;
724   }
725 
726   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
727   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
728 
729   /* determine if any processor has disassembled, if so we must
730      also disassemble ourselfs, in order that we may reassemble. */
731   /*
732      if nonzero structure of submatrix B cannot change then we know that
733      no processor disassembled thus we can skip this stuff
734   */
735   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
736     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
737     if (mat->was_assembled && !other_disassembled) {
738       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
739     }
740   }
741 
742   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
743     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
744   }
745   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
746   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
747 
748 #if defined(PETSC_USE_BOPT_g)
749   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
750     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
751     baij->ht_total_ct  = 0;
752     baij->ht_insert_ct = 0;
753   }
754 #endif
755   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
756     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
757     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
758     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
759   }
760 
761   if (baij->rowvalues) {
762     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
763     baij->rowvalues = 0;
764   }
765 
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
771 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
772 {
773   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
774   PetscErrorCode    ierr;
775   PetscInt          bs = mat->bs;
776   PetscMPIInt       size = baij->size,rank = baij->rank;
777   PetscTruth        iascii,isdraw;
778   PetscViewer       sviewer;
779   PetscViewerFormat format;
780 
781   PetscFunctionBegin;
782   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
783   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
784   if (iascii) {
785     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
786     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
787       MatInfo info;
788       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
789       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
790       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
791               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
792               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
793       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
794       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
795       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
796       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
797       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
798       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
799       PetscFunctionReturn(0);
800     } else if (format == PETSC_VIEWER_ASCII_INFO) {
801       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     }
804   }
805 
806   if (isdraw) {
807     PetscDraw       draw;
808     PetscTruth isnull;
809     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
810     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
811   }
812 
813   if (size == 1) {
814     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
815     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
816   } else {
817     /* assemble the entire matrix onto first processor. */
818     Mat         A;
819     Mat_SeqSBAIJ *Aloc;
820     Mat_SeqBAIJ *Bloc;
821     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
822     MatScalar   *a;
823 
824     /* Should this be the same type as mat? */
825     if (!rank) {
826       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
827     } else {
828       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
829     }
830     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
831     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
832     PetscLogObjectParent(mat,A);
833 
834     /* copy over the A part */
835     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
836     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
837     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
838 
839     for (i=0; i<mbs; i++) {
840       rvals[0] = bs*(baij->rstart + i);
841       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
842       for (j=ai[i]; j<ai[i+1]; j++) {
843         col = (baij->cstart+aj[j])*bs;
844         for (k=0; k<bs; k++) {
845           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
846           col++; a += bs;
847         }
848       }
849     }
850     /* copy over the B part */
851     Bloc = (Mat_SeqBAIJ*)baij->B->data;
852     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = baij->garray[aj[j]]*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     ierr = PetscFree(rvals);CHKERRQ(ierr);
865     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867     /*
868        Everyone has to call to draw the matrix since the graphics waits are
869        synchronized across all processors that share the PetscDraw object
870     */
871     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
872     if (!rank) {
873       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
874       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
875     }
876     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
877     ierr = MatDestroy(A);CHKERRQ(ierr);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatView_MPISBAIJ"
884 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
885 {
886   PetscErrorCode ierr;
887   PetscTruth     iascii,isdraw,issocket,isbinary;
888 
889   PetscFunctionBegin;
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
894   if (iascii || isdraw || issocket || isbinary) {
895     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
896   } else {
897     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatDestroy_MPISBAIJ"
904 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
905 {
906   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
907   PetscErrorCode ierr;
908 
909   PetscFunctionBegin;
910 #if defined(PETSC_USE_LOG)
911   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
912 #endif
913   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
914   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
915   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
917   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
918 #if defined (PETSC_USE_CTABLE)
919   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
920 #else
921   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
922 #endif
923   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
924   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
925   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
926   if (baij->slvec0) {
927     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
928     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
929   }
930   if (baij->slvec1) {
931     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
933     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
934   }
935   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
936   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
937   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
938   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
939 #if defined(PETSC_USE_MAT_SINGLE)
940   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
941 #endif
942   ierr = PetscFree(baij);CHKERRQ(ierr);
943 
944   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
945   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
946   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
947   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPISBAIJ"
953 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt,mbs=a->mbs,bs=A->bs;
958   PetscScalar    *x,*from,zero=0.0;
959 
960   PetscFunctionBegin;
961   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
962   if (nt != A->n) {
963     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
964   }
965   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
966   if (nt != A->m) {
967     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
968   }
969 
970   /* diagonal part */
971   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
972   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
973 
974   /* subdiagonal part */
975   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
976 
977   /* copy x into the vec slvec0 */
978   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
979   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
980   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
981   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
982 
983   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
984   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
985   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
986 
987   /* supperdiagonal part */
988   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
989 
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
995 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
996 {
997   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
998   PetscErrorCode ierr;
999   PetscInt       nt;
1000 
1001   PetscFunctionBegin;
1002   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1003   if (nt != A->n) {
1004     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1005   }
1006   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1007   if (nt != A->m) {
1008     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1009   }
1010 
1011   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1012   /* do diagonal part */
1013   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1014   /* do supperdiagonal part */
1015   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1016   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1017   /* do subdiagonal part */
1018   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1019   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1021 
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1027 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1028 {
1029   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1030   PetscErrorCode ierr;
1031   PetscInt       mbs=a->mbs,bs=A->bs;
1032   PetscScalar    *x,*from,zero=0.0;
1033 
1034   PetscFunctionBegin;
1035   /*
1036   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1037   PetscSynchronizedFlush(A->comm);
1038   */
1039   /* diagonal part */
1040   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1041   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1042 
1043   /* subdiagonal part */
1044   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1045 
1046   /* copy x into the vec slvec0 */
1047   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1048   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1049   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1050   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1051 
1052   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1053   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1054   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1055 
1056   /* supperdiagonal part */
1057   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1058 
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1064 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1065 {
1066   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1067   PetscErrorCode ierr;
1068 
1069   PetscFunctionBegin;
1070   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1071   /* do diagonal part */
1072   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1073   /* do supperdiagonal part */
1074   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1075   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1076 
1077   /* do subdiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1080   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1081 
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1087 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1088 {
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1098 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1099 {
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 /*
1108   This only works correctly for square matrices where the subblock A->A is the
1109    diagonal block
1110 */
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1113 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1114 {
1115   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1116   PetscErrorCode ierr;
1117 
1118   PetscFunctionBegin;
1119   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1120   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatScale_MPISBAIJ"
1126 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1127 {
1128   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1133   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1139 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1140 {
1141   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1142   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1143   PetscErrorCode ierr;
1144   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1145   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1146   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1147 
1148   PetscFunctionBegin;
1149   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1150   mat->getrowactive = PETSC_TRUE;
1151 
1152   if (!mat->rowvalues && (idx || v)) {
1153     /*
1154         allocate enough space to hold information from the longest row.
1155     */
1156     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1157     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1158     PetscInt     max = 1,mbs = mat->mbs,tmp;
1159     for (i=0; i<mbs; i++) {
1160       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1161       if (max < tmp) { max = tmp; }
1162     }
1163     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1164     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1165   }
1166 
1167   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1168   lrow = row - brstart;  /* local row index */
1169 
1170   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1171   if (!v)   {pvA = 0; pvB = 0;}
1172   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1173   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1174   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1175   nztot = nzA + nzB;
1176 
1177   cmap  = mat->garray;
1178   if (v  || idx) {
1179     if (nztot) {
1180       /* Sort by increasing column numbers, assuming A and B already sorted */
1181       PetscInt imark = -1;
1182       if (v) {
1183         *v = v_p = mat->rowvalues;
1184         for (i=0; i<nzB; i++) {
1185           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1186           else break;
1187         }
1188         imark = i;
1189         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1190         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1191       }
1192       if (idx) {
1193         *idx = idx_p = mat->rowindices;
1194         if (imark > -1) {
1195           for (i=0; i<imark; i++) {
1196             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1197           }
1198         } else {
1199           for (i=0; i<nzB; i++) {
1200             if (cmap[cworkB[i]/bs] < cstart)
1201               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1202             else break;
1203           }
1204           imark = i;
1205         }
1206         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1207         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1208       }
1209     } else {
1210       if (idx) *idx = 0;
1211       if (v)   *v   = 0;
1212     }
1213   }
1214   *nz = nztot;
1215   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1216   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1222 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1223 {
1224   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1225 
1226   PetscFunctionBegin;
1227   if (baij->getrowactive == PETSC_FALSE) {
1228     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1229   }
1230   baij->getrowactive = PETSC_FALSE;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1236 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1237 {
1238   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1243   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1249 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1250 {
1251   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1252   Mat            A = a->A,B = a->B;
1253   PetscErrorCode ierr;
1254   PetscReal      isend[5],irecv[5];
1255 
1256   PetscFunctionBegin;
1257   info->block_size     = (PetscReal)matin->bs;
1258   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1259   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1260   isend[3] = info->memory;  isend[4] = info->mallocs;
1261   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1262   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1263   isend[3] += info->memory;  isend[4] += info->mallocs;
1264   if (flag == MAT_LOCAL) {
1265     info->nz_used      = isend[0];
1266     info->nz_allocated = isend[1];
1267     info->nz_unneeded  = isend[2];
1268     info->memory       = isend[3];
1269     info->mallocs      = isend[4];
1270   } else if (flag == MAT_GLOBAL_MAX) {
1271     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1272     info->nz_used      = irecv[0];
1273     info->nz_allocated = irecv[1];
1274     info->nz_unneeded  = irecv[2];
1275     info->memory       = irecv[3];
1276     info->mallocs      = irecv[4];
1277   } else if (flag == MAT_GLOBAL_SUM) {
1278     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1279     info->nz_used      = irecv[0];
1280     info->nz_allocated = irecv[1];
1281     info->nz_unneeded  = irecv[2];
1282     info->memory       = irecv[3];
1283     info->mallocs      = irecv[4];
1284   } else {
1285     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1286   }
1287   info->rows_global       = (PetscReal)A->M;
1288   info->columns_global    = (PetscReal)A->N;
1289   info->rows_local        = (PetscReal)A->m;
1290   info->columns_local     = (PetscReal)A->N;
1291   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1292   info->fill_ratio_needed = 0;
1293   info->factor_mallocs    = 0;
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1299 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1300 {
1301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   switch (op) {
1306   case MAT_NO_NEW_NONZERO_LOCATIONS:
1307   case MAT_YES_NEW_NONZERO_LOCATIONS:
1308   case MAT_COLUMNS_UNSORTED:
1309   case MAT_COLUMNS_SORTED:
1310   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1311   case MAT_KEEP_ZEROED_ROWS:
1312   case MAT_NEW_NONZERO_LOCATION_ERR:
1313     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1314     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1315     break;
1316   case MAT_ROW_ORIENTED:
1317     a->roworiented = PETSC_TRUE;
1318     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1319     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1320     break;
1321   case MAT_ROWS_SORTED:
1322   case MAT_ROWS_UNSORTED:
1323   case MAT_YES_NEW_DIAGONALS:
1324     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1325     break;
1326   case MAT_COLUMN_ORIENTED:
1327     a->roworiented = PETSC_FALSE;
1328     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1329     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1330     break;
1331   case MAT_IGNORE_OFF_PROC_ENTRIES:
1332     a->donotstash = PETSC_TRUE;
1333     break;
1334   case MAT_NO_NEW_DIAGONALS:
1335     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1336   case MAT_USE_HASH_TABLE:
1337     a->ht_flag = PETSC_TRUE;
1338     break;
1339   case MAT_NOT_SYMMETRIC:
1340   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1341   case MAT_HERMITIAN:
1342     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1343   case MAT_SYMMETRIC:
1344   case MAT_STRUCTURALLY_SYMMETRIC:
1345   case MAT_NOT_HERMITIAN:
1346   case MAT_SYMMETRY_ETERNAL:
1347   case MAT_NOT_SYMMETRY_ETERNAL:
1348     break;
1349   default:
1350     SETERRQ(PETSC_ERR_SUP,"unknown option");
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1357 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1358 {
1359   PetscErrorCode ierr;
1360   PetscFunctionBegin;
1361   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1367 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1368 {
1369   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1370   Mat            a = baij->A,b = baij->B;
1371   PetscErrorCode ierr;
1372   PetscInt       s1,s2,s3;
1373 
1374   PetscFunctionBegin;
1375   if (ll != rr) {
1376     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1377   }
1378   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1379   if (rr) {
1380     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1381     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1382     /* Overlap communication with computation. */
1383     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1384     /*} if (ll) { */
1385     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1386     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1387     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1388     /* } */
1389   /* scale  the diagonal block */
1390   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1391 
1392   /* if (rr) { */
1393     /* Do a scatter end and then right scale the off-diagonal block */
1394     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1395     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1396   }
1397 
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1403 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1404 {
1405   PetscFunctionBegin;
1406   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1411 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1414   MPI_Comm          comm = A->comm;
1415   static PetscTruth called = PETSC_FALSE;
1416   PetscErrorCode    ierr;
1417 
1418   PetscFunctionBegin;
1419   if (!a->rank) {
1420     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1421   }
1422   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1423   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1424   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1430 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1431 {
1432   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatEqual_MPISBAIJ"
1444 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1445 {
1446   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1447   Mat            a,b,c,d;
1448   PetscTruth     flg;
1449   PetscErrorCode ierr;
1450 
1451   PetscFunctionBegin;
1452   a = matA->A; b = matA->B;
1453   c = matB->A; d = matB->B;
1454 
1455   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1456   if (flg == PETSC_TRUE) {
1457     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1458   }
1459   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1465 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1466 {
1467   PetscErrorCode ierr;
1468 
1469   PetscFunctionBegin;
1470   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1476 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1477 {
1478   PetscErrorCode ierr;
1479   PetscInt       i;
1480   PetscTruth     flg;
1481 
1482   PetscFunctionBegin;
1483   for (i=0; i<n; i++) {
1484     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1485     if (!flg) {
1486       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1487     }
1488   }
1489   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 
1494 /* -------------------------------------------------------------------*/
1495 static struct _MatOps MatOps_Values = {
1496        MatSetValues_MPISBAIJ,
1497        MatGetRow_MPISBAIJ,
1498        MatRestoreRow_MPISBAIJ,
1499        MatMult_MPISBAIJ,
1500 /* 4*/ MatMultAdd_MPISBAIJ,
1501        MatMultTranspose_MPISBAIJ,
1502        MatMultTransposeAdd_MPISBAIJ,
1503        0,
1504        0,
1505        0,
1506 /*10*/ 0,
1507        0,
1508        0,
1509        MatRelax_MPISBAIJ,
1510        MatTranspose_MPISBAIJ,
1511 /*15*/ MatGetInfo_MPISBAIJ,
1512        MatEqual_MPISBAIJ,
1513        MatGetDiagonal_MPISBAIJ,
1514        MatDiagonalScale_MPISBAIJ,
1515        MatNorm_MPISBAIJ,
1516 /*20*/ MatAssemblyBegin_MPISBAIJ,
1517        MatAssemblyEnd_MPISBAIJ,
1518        0,
1519        MatSetOption_MPISBAIJ,
1520        MatZeroEntries_MPISBAIJ,
1521 /*25*/ MatZeroRows_MPISBAIJ,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*35*/ MatDuplicate_MPISBAIJ,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*40*/ 0,
1537        MatGetSubMatrices_MPISBAIJ,
1538        MatIncreaseOverlap_MPISBAIJ,
1539        MatGetValues_MPISBAIJ,
1540        0,
1541 /*45*/ MatPrintHelp_MPISBAIJ,
1542        MatScale_MPISBAIJ,
1543        0,
1544        0,
1545        0,
1546 /*50*/ 0,
1547        0,
1548        0,
1549        0,
1550        0,
1551 /*55*/ 0,
1552        0,
1553        MatSetUnfactored_MPISBAIJ,
1554        0,
1555        MatSetValuesBlocked_MPISBAIJ,
1556 /*60*/ 0,
1557        0,
1558        0,
1559        MatGetPetscMaps_Petsc,
1560        0,
1561 /*65*/ 0,
1562        0,
1563        0,
1564        0,
1565        0,
1566 /*70*/ MatGetRowMax_MPISBAIJ,
1567        0,
1568        0,
1569        0,
1570        0,
1571 /*75*/ 0,
1572        0,
1573        0,
1574        0,
1575        0,
1576 /*80*/ 0,
1577        0,
1578        0,
1579        0,
1580        MatLoad_MPISBAIJ,
1581 /*85*/ 0,
1582        0,
1583        0,
1584        0,
1585        0,
1586 /*90*/ 0,
1587        0,
1588        0,
1589        0,
1590        0,
1591 /*95*/ 0,
1592        0,
1593        0,
1594        0};
1595 
1596 
1597 EXTERN_C_BEGIN
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1600 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1601 {
1602   PetscFunctionBegin;
1603   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1604   *iscopy = PETSC_FALSE;
1605   PetscFunctionReturn(0);
1606 }
1607 EXTERN_C_END
1608 
1609 EXTERN_C_BEGIN
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1612 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1613 {
1614   Mat_MPISBAIJ   *b;
1615   PetscErrorCode ierr;
1616   PetscInt       i,mbs,Mbs;
1617 
1618   PetscFunctionBegin;
1619   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1620 
1621   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1622   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1623   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1624   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1625   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1626   if (d_nnz) {
1627     for (i=0; i<B->m/bs; i++) {
1628       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1629     }
1630   }
1631   if (o_nnz) {
1632     for (i=0; i<B->m/bs; i++) {
1633       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1634     }
1635   }
1636   B->preallocated = PETSC_TRUE;
1637   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1638   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1639   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1640   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1641 
1642   b   = (Mat_MPISBAIJ*)B->data;
1643   mbs = B->m/bs;
1644   Mbs = B->M/bs;
1645   if (mbs*bs != B->m) {
1646     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1647   }
1648 
1649   B->bs  = bs;
1650   b->bs2 = bs*bs;
1651   b->mbs = mbs;
1652   b->nbs = mbs;
1653   b->Mbs = Mbs;
1654   b->Nbs = Mbs;
1655 
1656   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1657   b->rowners[0]    = 0;
1658   for (i=2; i<=b->size; i++) {
1659     b->rowners[i] += b->rowners[i-1];
1660   }
1661   b->rstart    = b->rowners[b->rank];
1662   b->rend      = b->rowners[b->rank+1];
1663   b->cstart    = b->rstart;
1664   b->cend      = b->rend;
1665   for (i=0; i<=b->size; i++) {
1666     b->rowners_bs[i] = b->rowners[i]*bs;
1667   }
1668   b->rstart_bs = b-> rstart*bs;
1669   b->rend_bs   = b->rend*bs;
1670 
1671   b->cstart_bs = b->cstart*bs;
1672   b->cend_bs   = b->cend*bs;
1673 
1674   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1675   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1676   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1677   PetscLogObjectParent(B,b->A);
1678 
1679   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1680   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1681   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1682   PetscLogObjectParent(B,b->B);
1683 
1684   /* build cache for off array entries formed */
1685   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1686 
1687   PetscFunctionReturn(0);
1688 }
1689 EXTERN_C_END
1690 
1691 /*MC
1692    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1693    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1694 
1695    Options Database Keys:
1696 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1697 
1698   Level: beginner
1699 
1700 .seealso: MatCreateMPISBAIJ
1701 M*/
1702 
1703 EXTERN_C_BEGIN
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatCreate_MPISBAIJ"
1706 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1707 {
1708   Mat_MPISBAIJ   *b;
1709   PetscErrorCode ierr;
1710   PetscTruth     flg;
1711 
1712   PetscFunctionBegin;
1713 
1714   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1715   B->data = (void*)b;
1716   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1717   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1718 
1719   B->ops->destroy    = MatDestroy_MPISBAIJ;
1720   B->ops->view       = MatView_MPISBAIJ;
1721   B->mapping    = 0;
1722   B->factor     = 0;
1723   B->assembled  = PETSC_FALSE;
1724 
1725   B->insertmode = NOT_SET_VALUES;
1726   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1727   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1728 
1729   /* build local table of row and column ownerships */
1730   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1731   b->cowners    = b->rowners + b->size + 2;
1732   b->rowners_bs = b->cowners + b->size + 2;
1733   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1734 
1735   /* build cache for off array entries formed */
1736   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1737   b->donotstash  = PETSC_FALSE;
1738   b->colmap      = PETSC_NULL;
1739   b->garray      = PETSC_NULL;
1740   b->roworiented = PETSC_TRUE;
1741 
1742 #if defined(PETSC_USE_MAT_SINGLE)
1743   /* stuff for MatSetValues_XXX in single precision */
1744   b->setvalueslen     = 0;
1745   b->setvaluescopy    = PETSC_NULL;
1746 #endif
1747 
1748   /* stuff used in block assembly */
1749   b->barray       = 0;
1750 
1751   /* stuff used for matrix vector multiply */
1752   b->lvec         = 0;
1753   b->Mvctx        = 0;
1754   b->slvec0       = 0;
1755   b->slvec0b      = 0;
1756   b->slvec1       = 0;
1757   b->slvec1a      = 0;
1758   b->slvec1b      = 0;
1759   b->sMvctx       = 0;
1760 
1761   /* stuff for MatGetRow() */
1762   b->rowindices   = 0;
1763   b->rowvalues    = 0;
1764   b->getrowactive = PETSC_FALSE;
1765 
1766   /* hash table stuff */
1767   b->ht           = 0;
1768   b->hd           = 0;
1769   b->ht_size      = 0;
1770   b->ht_flag      = PETSC_FALSE;
1771   b->ht_fact      = 0;
1772   b->ht_total_ct  = 0;
1773   b->ht_insert_ct = 0;
1774 
1775   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1776   if (flg) {
1777     PetscReal fact = 1.39;
1778     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1779     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1780     if (fact <= 1.0) fact = 1.39;
1781     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1782     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1783   }
1784   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1785                                      "MatStoreValues_MPISBAIJ",
1786                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1788                                      "MatRetrieveValues_MPISBAIJ",
1789                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1790   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1791                                      "MatGetDiagonalBlock_MPISBAIJ",
1792                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1794                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1795                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1796   B->symmetric                  = PETSC_TRUE;
1797   B->structurally_symmetric     = PETSC_TRUE;
1798   B->symmetric_set              = PETSC_TRUE;
1799   B->structurally_symmetric_set = PETSC_TRUE;
1800   PetscFunctionReturn(0);
1801 }
1802 EXTERN_C_END
1803 
1804 /*MC
1805    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1806 
1807    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1808    and MATMPISBAIJ otherwise.
1809 
1810    Options Database Keys:
1811 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1812 
1813   Level: beginner
1814 
1815 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1816 M*/
1817 
1818 EXTERN_C_BEGIN
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatCreate_SBAIJ"
1821 PetscErrorCode MatCreate_SBAIJ(Mat A)
1822 {
1823   PetscErrorCode ierr;
1824   PetscMPIInt    size;
1825 
1826   PetscFunctionBegin;
1827   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1828   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1829   if (size == 1) {
1830     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1831   } else {
1832     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1833   }
1834   PetscFunctionReturn(0);
1835 }
1836 EXTERN_C_END
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1840 /*@C
1841    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1842    the user should preallocate the matrix storage by setting the parameters
1843    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1844    performance can be increased by more than a factor of 50.
1845 
1846    Collective on Mat
1847 
1848    Input Parameters:
1849 +  A - the matrix
1850 .  bs   - size of blockk
1851 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1852            submatrix  (same for all local rows)
1853 .  d_nnz - array containing the number of block nonzeros in the various block rows
1854            in the upper triangular and diagonal part of the in diagonal portion of the local
1855            (possibly different for each block row) or PETSC_NULL.  You must leave room
1856            for the diagonal entry even if it is zero.
1857 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1858            submatrix (same for all local rows).
1859 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1860            off-diagonal portion of the local submatrix (possibly different for
1861            each block row) or PETSC_NULL.
1862 
1863 
1864    Options Database Keys:
1865 .   -mat_no_unroll - uses code that does not unroll the loops in the
1866                      block calculations (much slower)
1867 .   -mat_block_size - size of the blocks to use
1868 
1869    Notes:
1870 
1871    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1872    than it must be used on all processors that share the object for that argument.
1873 
1874    Storage Information:
1875    For a square global matrix we define each processor's diagonal portion
1876    to be its local rows and the corresponding columns (a square submatrix);
1877    each processor's off-diagonal portion encompasses the remainder of the
1878    local matrix (a rectangular submatrix).
1879 
1880    The user can specify preallocated storage for the diagonal part of
1881    the local submatrix with either d_nz or d_nnz (not both).  Set
1882    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1883    memory allocation.  Likewise, specify preallocated storage for the
1884    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1885 
1886    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1887    the figure below we depict these three local rows and all columns (0-11).
1888 
1889 .vb
1890            0 1 2 3 4 5 6 7 8 9 10 11
1891           -------------------
1892    row 3  |  o o o d d d o o o o o o
1893    row 4  |  o o o d d d o o o o o o
1894    row 5  |  o o o d d d o o o o o o
1895           -------------------
1896 .ve
1897 
1898    Thus, any entries in the d locations are stored in the d (diagonal)
1899    submatrix, and any entries in the o locations are stored in the
1900    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1901    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1902 
1903    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1904    plus the diagonal part of the d matrix,
1905    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1906    In general, for PDE problems in which most nonzeros are near the diagonal,
1907    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1908    or you will get TERRIBLE performance; see the users' manual chapter on
1909    matrices.
1910 
1911    Level: intermediate
1912 
1913 .keywords: matrix, block, aij, compressed row, sparse, parallel
1914 
1915 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1916 @*/
1917 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1918 {
1919   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1920 
1921   PetscFunctionBegin;
1922   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1923   if (f) {
1924     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1925   }
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatCreateMPISBAIJ"
1931 /*@C
1932    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1933    (block compressed row).  For good matrix assembly performance
1934    the user should preallocate the matrix storage by setting the parameters
1935    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1936    performance can be increased by more than a factor of 50.
1937 
1938    Collective on MPI_Comm
1939 
1940    Input Parameters:
1941 +  comm - MPI communicator
1942 .  bs   - size of blockk
1943 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1944            This value should be the same as the local size used in creating the
1945            y vector for the matrix-vector product y = Ax.
1946 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1947            This value should be the same as the local size used in creating the
1948            x vector for the matrix-vector product y = Ax.
1949 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1950 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1951 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1952            submatrix  (same for all local rows)
1953 .  d_nnz - array containing the number of block nonzeros in the various block rows
1954            in the upper triangular portion of the in diagonal portion of the local
1955            (possibly different for each block block row) or PETSC_NULL.
1956            You must leave room for the diagonal entry even if it is zero.
1957 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1958            submatrix (same for all local rows).
1959 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1960            off-diagonal portion of the local submatrix (possibly different for
1961            each block row) or PETSC_NULL.
1962 
1963    Output Parameter:
1964 .  A - the matrix
1965 
1966    Options Database Keys:
1967 .   -mat_no_unroll - uses code that does not unroll the loops in the
1968                      block calculations (much slower)
1969 .   -mat_block_size - size of the blocks to use
1970 .   -mat_mpi - use the parallel matrix data structures even on one processor
1971                (defaults to using SeqBAIJ format on one processor)
1972 
1973    Notes:
1974    The user MUST specify either the local or global matrix dimensions
1975    (possibly both).
1976 
1977    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1978    than it must be used on all processors that share the object for that argument.
1979 
1980    Storage Information:
1981    For a square global matrix we define each processor's diagonal portion
1982    to be its local rows and the corresponding columns (a square submatrix);
1983    each processor's off-diagonal portion encompasses the remainder of the
1984    local matrix (a rectangular submatrix).
1985 
1986    The user can specify preallocated storage for the diagonal part of
1987    the local submatrix with either d_nz or d_nnz (not both).  Set
1988    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1989    memory allocation.  Likewise, specify preallocated storage for the
1990    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1991 
1992    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1993    the figure below we depict these three local rows and all columns (0-11).
1994 
1995 .vb
1996            0 1 2 3 4 5 6 7 8 9 10 11
1997           -------------------
1998    row 3  |  o o o d d d o o o o o o
1999    row 4  |  o o o d d d o o o o o o
2000    row 5  |  o o o d d d o o o o o o
2001           -------------------
2002 .ve
2003 
2004    Thus, any entries in the d locations are stored in the d (diagonal)
2005    submatrix, and any entries in the o locations are stored in the
2006    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2007    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2008 
2009    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2010    plus the diagonal part of the d matrix,
2011    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2012    In general, for PDE problems in which most nonzeros are near the diagonal,
2013    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2014    or you will get TERRIBLE performance; see the users' manual chapter on
2015    matrices.
2016 
2017    Level: intermediate
2018 
2019 .keywords: matrix, block, aij, compressed row, sparse, parallel
2020 
2021 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2022 @*/
2023 
2024 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2025 {
2026   PetscErrorCode ierr;
2027   PetscMPIInt    size;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2031   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2032   if (size > 1) {
2033     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2034     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2035   } else {
2036     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2037     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2038   }
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 
2043 #undef __FUNCT__
2044 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2045 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2046 {
2047   Mat            mat;
2048   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2049   PetscErrorCode ierr;
2050   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
2051   PetscScalar    *array;
2052 
2053   PetscFunctionBegin;
2054   *newmat       = 0;
2055   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2056   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2057   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2058 
2059   mat->factor       = matin->factor;
2060   mat->preallocated = PETSC_TRUE;
2061   mat->assembled    = PETSC_TRUE;
2062   mat->insertmode   = NOT_SET_VALUES;
2063 
2064   a = (Mat_MPISBAIJ*)mat->data;
2065   mat->bs  = matin->bs;
2066   a->bs2   = oldmat->bs2;
2067   a->mbs   = oldmat->mbs;
2068   a->nbs   = oldmat->nbs;
2069   a->Mbs   = oldmat->Mbs;
2070   a->Nbs   = oldmat->Nbs;
2071 
2072   a->rstart       = oldmat->rstart;
2073   a->rend         = oldmat->rend;
2074   a->cstart       = oldmat->cstart;
2075   a->cend         = oldmat->cend;
2076   a->size         = oldmat->size;
2077   a->rank         = oldmat->rank;
2078   a->donotstash   = oldmat->donotstash;
2079   a->roworiented  = oldmat->roworiented;
2080   a->rowindices   = 0;
2081   a->rowvalues    = 0;
2082   a->getrowactive = PETSC_FALSE;
2083   a->barray       = 0;
2084   a->rstart_bs    = oldmat->rstart_bs;
2085   a->rend_bs      = oldmat->rend_bs;
2086   a->cstart_bs    = oldmat->cstart_bs;
2087   a->cend_bs      = oldmat->cend_bs;
2088 
2089   /* hash table stuff */
2090   a->ht           = 0;
2091   a->hd           = 0;
2092   a->ht_size      = 0;
2093   a->ht_flag      = oldmat->ht_flag;
2094   a->ht_fact      = oldmat->ht_fact;
2095   a->ht_total_ct  = 0;
2096   a->ht_insert_ct = 0;
2097 
2098   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2099   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2100   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2101   if (oldmat->colmap) {
2102 #if defined (PETSC_USE_CTABLE)
2103     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2104 #else
2105     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2106     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2107     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2108 #endif
2109   } else a->colmap = 0;
2110 
2111   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2112     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2113     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2114     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2115   } else a->garray = 0;
2116 
2117   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2118   PetscLogObjectParent(mat,a->lvec);
2119   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2120   PetscLogObjectParent(mat,a->Mvctx);
2121 
2122   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2123   PetscLogObjectParent(mat,a->slvec0);
2124   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2125   PetscLogObjectParent(mat,a->slvec1);
2126 
2127   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2128   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2131   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2132   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2133   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2134   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2135   PetscLogObjectParent(mat,a->slvec0);
2136   PetscLogObjectParent(mat,a->slvec1);
2137   PetscLogObjectParent(mat,a->slvec0b);
2138   PetscLogObjectParent(mat,a->slvec1a);
2139   PetscLogObjectParent(mat,a->slvec1b);
2140 
2141   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2142   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2143   a->sMvctx = oldmat->sMvctx;
2144   PetscLogObjectParent(mat,a->sMvctx);
2145 
2146   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2147   PetscLogObjectParent(mat,a->A);
2148   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2149   PetscLogObjectParent(mat,a->B);
2150   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2151   *newmat = mat;
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #include "petscsys.h"
2156 
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatLoad_MPISBAIJ"
2159 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2160 {
2161   Mat            A;
2162   PetscErrorCode ierr;
2163   PetscInt       i,nz,j,rstart,rend;
2164   PetscScalar    *vals,*buf;
2165   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2166   MPI_Status     status;
2167   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2168   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2169   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2170   PetscInt       bs=1,Mbs,mbs,extra_rows;
2171   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2172   PetscInt       dcount,kmax,k,nzcount,tmp;
2173   int            fd;
2174 
2175   PetscFunctionBegin;
2176   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2177 
2178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2180   if (!rank) {
2181     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2182     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2183     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2184     if (header[3] < 0) {
2185       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2186     }
2187   }
2188 
2189   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2190   M = header[1]; N = header[2];
2191 
2192   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2193 
2194   /*
2195      This code adds extra rows to make sure the number of rows is
2196      divisible by the blocksize
2197   */
2198   Mbs        = M/bs;
2199   extra_rows = bs - M + bs*(Mbs);
2200   if (extra_rows == bs) extra_rows = 0;
2201   else                  Mbs++;
2202   if (extra_rows &&!rank) {
2203     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2204   }
2205 
2206   /* determine ownership of all rows */
2207   mbs        = Mbs/size + ((Mbs % size) > rank);
2208   m          = mbs*bs;
2209   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2210   browners   = rowners + size + 1;
2211   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2212   rowners[0] = 0;
2213   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2214   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2215   rstart = rowners[rank];
2216   rend   = rowners[rank+1];
2217 
2218   /* distribute row lengths to all processors */
2219   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2220   if (!rank) {
2221     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2222     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2223     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2224     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2225     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2226     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2227     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2228   } else {
2229     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2230   }
2231 
2232   if (!rank) {   /* procs[0] */
2233     /* calculate the number of nonzeros on each processor */
2234     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2235     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2236     for (i=0; i<size; i++) {
2237       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2238         procsnz[i] += rowlengths[j];
2239       }
2240     }
2241     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2242 
2243     /* determine max buffer needed and allocate it */
2244     maxnz = 0;
2245     for (i=0; i<size; i++) {
2246       maxnz = PetscMax(maxnz,procsnz[i]);
2247     }
2248     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2249 
2250     /* read in my part of the matrix column indices  */
2251     nz     = procsnz[0];
2252     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2253     mycols = ibuf;
2254     if (size == 1)  nz -= extra_rows;
2255     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2256     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2257 
2258     /* read in every ones (except the last) and ship off */
2259     for (i=1; i<size-1; i++) {
2260       nz   = procsnz[i];
2261       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2262       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2263     }
2264     /* read in the stuff for the last proc */
2265     if (size != 1) {
2266       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2267       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2268       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2269       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2270     }
2271     ierr = PetscFree(cols);CHKERRQ(ierr);
2272   } else {  /* procs[i], i>0 */
2273     /* determine buffer space needed for message */
2274     nz = 0;
2275     for (i=0; i<m; i++) {
2276       nz += locrowlens[i];
2277     }
2278     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2279     mycols = ibuf;
2280     /* receive message of column indices*/
2281     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2282     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2283     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2284   }
2285 
2286   /* loop over local rows, determining number of off diagonal entries */
2287   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2288   odlens   = dlens + (rend-rstart);
2289   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2290   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2291   masked1  = mask    + Mbs;
2292   masked2  = masked1 + Mbs;
2293   rowcount = 0; nzcount = 0;
2294   for (i=0; i<mbs; i++) {
2295     dcount  = 0;
2296     odcount = 0;
2297     for (j=0; j<bs; j++) {
2298       kmax = locrowlens[rowcount];
2299       for (k=0; k<kmax; k++) {
2300         tmp = mycols[nzcount++]/bs; /* block col. index */
2301         if (!mask[tmp]) {
2302           mask[tmp] = 1;
2303           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2304           else masked1[dcount++] = tmp; /* entry in diag portion */
2305         }
2306       }
2307       rowcount++;
2308     }
2309 
2310     dlens[i]  = dcount;  /* d_nzz[i] */
2311     odlens[i] = odcount; /* o_nzz[i] */
2312 
2313     /* zero out the mask elements we set */
2314     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2315     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2316   }
2317 
2318   /* create our matrix */
2319   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2320   ierr = MatSetType(A,type);CHKERRQ(ierr);
2321   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2322   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2323 
2324   if (!rank) {
2325     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2326     /* read in my part of the matrix numerical values  */
2327     nz = procsnz[0];
2328     vals = buf;
2329     mycols = ibuf;
2330     if (size == 1)  nz -= extra_rows;
2331     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2332     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2333 
2334     /* insert into matrix */
2335     jj      = rstart*bs;
2336     for (i=0; i<m; i++) {
2337       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2338       mycols += locrowlens[i];
2339       vals   += locrowlens[i];
2340       jj++;
2341     }
2342 
2343     /* read in other processors (except the last one) and ship out */
2344     for (i=1; i<size-1; i++) {
2345       nz   = procsnz[i];
2346       vals = buf;
2347       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2348       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2349     }
2350     /* the last proc */
2351     if (size != 1){
2352       nz   = procsnz[i] - extra_rows;
2353       vals = buf;
2354       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2355       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2356       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2357     }
2358     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2359 
2360   } else {
2361     /* receive numeric values */
2362     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2363 
2364     /* receive message of values*/
2365     vals   = buf;
2366     mycols = ibuf;
2367     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2368     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2369     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2370 
2371     /* insert into matrix */
2372     jj      = rstart*bs;
2373     for (i=0; i<m; i++) {
2374       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2375       mycols += locrowlens[i];
2376       vals   += locrowlens[i];
2377       jj++;
2378     }
2379   }
2380 
2381   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2382   ierr = PetscFree(buf);CHKERRQ(ierr);
2383   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2384   ierr = PetscFree(rowners);CHKERRQ(ierr);
2385   ierr = PetscFree(dlens);CHKERRQ(ierr);
2386   ierr = PetscFree(mask);CHKERRQ(ierr);
2387   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   *newmat = A;
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2395 /*@
2396    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2397 
2398    Input Parameters:
2399 .  mat  - the matrix
2400 .  fact - factor
2401 
2402    Collective on Mat
2403 
2404    Level: advanced
2405 
2406   Notes:
2407    This can also be set by the command line option: -mat_use_hash_table fact
2408 
2409 .keywords: matrix, hashtable, factor, HT
2410 
2411 .seealso: MatSetOption()
2412 @*/
2413 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2414 {
2415   PetscFunctionBegin;
2416   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
2417   /* PetscFunctionReturn(0); */
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2422 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2423 {
2424   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2425   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2426   PetscReal      atmp;
2427   PetscReal      *work,*svalues,*rvalues;
2428   PetscErrorCode ierr;
2429   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2430   PetscMPIInt    rank,size;
2431   PetscInt       *rowners_bs,dest,count,source;
2432   PetscScalar    *va;
2433   MatScalar      *ba;
2434   MPI_Status     stat;
2435 
2436   PetscFunctionBegin;
2437   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2438   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2439 
2440   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2441   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2442 
2443   bs   = A->bs;
2444   mbs  = a->mbs;
2445   Mbs  = a->Mbs;
2446   ba   = b->a;
2447   bi   = b->i;
2448   bj   = b->j;
2449 
2450   /* find ownerships */
2451   rowners_bs = a->rowners_bs;
2452 
2453   /* each proc creates an array to be distributed */
2454   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2455   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2456 
2457   /* row_max for B */
2458   if (rank != size-1){
2459     for (i=0; i<mbs; i++) {
2460       ncols = bi[1] - bi[0]; bi++;
2461       brow  = bs*i;
2462       for (j=0; j<ncols; j++){
2463         bcol = bs*(*bj);
2464         for (kcol=0; kcol<bs; kcol++){
2465           col = bcol + kcol;                 /* local col index */
2466           col += rowners_bs[rank+1];      /* global col index */
2467           for (krow=0; krow<bs; krow++){
2468             atmp = PetscAbsScalar(*ba); ba++;
2469             row = brow + krow;    /* local row index */
2470             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2471             if (work[col] < atmp) work[col] = atmp;
2472           }
2473         }
2474         bj++;
2475       }
2476     }
2477 
2478     /* send values to its owners */
2479     for (dest=rank+1; dest<size; dest++){
2480       svalues = work + rowners_bs[dest];
2481       count   = rowners_bs[dest+1]-rowners_bs[dest];
2482       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2483     }
2484   }
2485 
2486   /* receive values */
2487   if (rank){
2488     rvalues = work;
2489     count   = rowners_bs[rank+1]-rowners_bs[rank];
2490     for (source=0; source<rank; source++){
2491       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2492       /* process values */
2493       for (i=0; i<count; i++){
2494         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2495       }
2496     }
2497   }
2498 
2499   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2500   ierr = PetscFree(work);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatRelax_MPISBAIJ"
2506 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2507 {
2508   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2509   PetscErrorCode ierr;
2510   PetscInt       mbs=mat->mbs,bs=matin->bs;
2511   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2512   Vec            bb1;
2513 
2514   PetscFunctionBegin;
2515   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2516   if (bs > 1)
2517     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2518 
2519   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2520     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2521       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2522       its--;
2523     }
2524 
2525     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2526     while (its--){
2527 
2528       /* lower triangular part: slvec0b = - B^T*xx */
2529       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2530 
2531       /* copy xx into slvec0a */
2532       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2533       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2534       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2535       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2536 
2537       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2538 
2539       /* copy bb into slvec1a */
2540       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2541       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2542       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2543       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2544 
2545       /* set slvec1b = 0 */
2546       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2547 
2548       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2549       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2550       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2551       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2552 
2553       /* upper triangular part: bb1 = bb1 - B*x */
2554       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2555 
2556       /* local diagonal sweep */
2557       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2558     }
2559     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2560   } else {
2561     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2562   }
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2568 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2569 {
2570   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2571   PetscErrorCode ierr;
2572   PetscScalar    mone=-1.0;
2573   Vec            lvec1,bb1;
2574 
2575   PetscFunctionBegin;
2576   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2577   if (matin->bs > 1)
2578     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2579 
2580   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2581     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2582       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2583       its--;
2584     }
2585 
2586     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2587     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2588     while (its--){
2589       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2590 
2591       /* lower diagonal part: bb1 = bb - B^T*xx */
2592       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2593       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2594 
2595       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2596       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2597       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2598 
2599       /* upper diagonal part: bb1 = bb1 - B*x */
2600       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2601       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2602 
2603       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2604 
2605       /* diagonal sweep */
2606       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2607     }
2608     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2609     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2610   } else {
2611     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2612   }
2613   PetscFunctionReturn(0);
2614 }
2615 
2616