xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 69a612a9be7fbcc0ab27d96ce41c4e3795ac7f2a)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
290   /* PetscFunctionReturn(0); */
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,N = m*n*b->bs2;
300   MatScalar      *vsingle;
301 
302   PetscFunctionBegin;
303   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
304   /* PetscFunctionReturn(0); */
305 }
306 #endif
307 
308 /* Only add/insert a(i,j) with i<=j (blocks).
309    Any a(i,j) with i>j input by user is ingored.
310 */
311 #undef __FUNCT__
312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
314 {
315   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
316   MatScalar      value;
317   PetscTruth     roworiented = baij->roworiented;
318   PetscErrorCode ierr;
319   PetscInt       i,j,row,col;
320   PetscInt       rstart_orig=baij->rstart_bs;
321   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
322   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
323 
324   /* Some Variables required in the macro */
325   Mat            A = baij->A;
326   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
327   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
328   MatScalar      *aa=a->a;
329 
330   Mat            B = baij->B;
331   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
332   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
333   MatScalar     *ba=b->a;
334 
335   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
336   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
337   MatScalar     *ap,*bap;
338 
339   /* for stash */
340   PetscInt      n_loc, *in_loc=0;
341   MatScalar     *v_loc=0;
342 
343   PetscFunctionBegin;
344 
345   if(!baij->donotstash){
346     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
347     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
348   }
349 
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_BOPT_g)
353     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
354 #endif
355     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
356       row = im[i] - rstart_orig;              /* local row index */
357       for (j=0; j<n; j++) {
358         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
359         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
360           col = in[j] - cstart_orig;          /* local col index */
361           brow = row/bs; bcol = col/bs;
362           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         } else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
369 #endif
370         else {  /* off-diag entry (B) */
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 #if defined (PETSC_USE_CTABLE)
376             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
377             col  = col - 1;
378 #else
379             col = baij->colmap[in[j]/bs] - 1;
380 #endif
381             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
382               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
383               col =  in[j];
384               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
385               B = baij->B;
386               b = (Mat_SeqBAIJ*)(B)->data;
387               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
388               ba=b->a;
389             } else col += in[j]%bs;
390           } else col = in[j];
391           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
392           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
393           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
394         }
395       }
396     } else {  /* off processor entry */
397       if (!baij->donotstash) {
398         n_loc = 0;
399         for (j=0; j<n; j++){
400           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
401           in_loc[n_loc] = in[j];
402           if (roworiented) {
403             v_loc[n_loc] = v[i*n+j];
404           } else {
405             v_loc[n_loc] = v[j*m+i];
406           }
407           n_loc++;
408         }
409         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
410       }
411     }
412   }
413 
414   if(!baij->donotstash){
415     ierr = PetscFree(in_loc);CHKERRQ(ierr);
416     ierr = PetscFree(v_loc);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
424 {
425   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
426   const MatScalar *value;
427   MatScalar       *barray=baij->barray;
428   PetscTruth      roworiented = baij->roworiented;
429   PetscErrorCode  ierr;
430   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
431   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
432   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
433 
434   PetscFunctionBegin;
435   if(!barray) {
436     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
437     baij->barray = barray;
438   }
439 
440   if (roworiented) {
441     stepval = (n-1)*bs;
442   } else {
443     stepval = (m-1)*bs;
444   }
445   for (i=0; i<m; i++) {
446     if (im[i] < 0) continue;
447 #if defined(PETSC_USE_BOPT_g)
448     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
449 #endif
450     if (im[i] >= rstart && im[i] < rend) {
451       row = im[i] - rstart;
452       for (j=0; j<n; j++) {
453         /* If NumCol = 1 then a copy is not required */
454         if ((roworiented) && (n == 1)) {
455           barray = (MatScalar*) v + i*bs2;
456         } else if((!roworiented) && (m == 1)) {
457           barray = (MatScalar*) v + j*bs2;
458         } else { /* Here a copy is required */
459           if (roworiented) {
460             value = v + i*(stepval+bs)*bs + j*bs;
461           } else {
462             value = v + j*(stepval+bs)*bs + i*bs;
463           }
464           for (ii=0; ii<bs; ii++,value+=stepval) {
465             for (jj=0; jj<bs; jj++) {
466               *barray++  = *value++;
467             }
468           }
469           barray -=bs2;
470         }
471 
472         if (in[j] >= cstart && in[j] < cend){
473           col  = in[j] - cstart;
474           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476         else if (in[j] < 0) continue;
477 #if defined(PETSC_USE_BOPT_g)
478         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
479 #endif
480         else {
481           if (mat->was_assembled) {
482             if (!baij->colmap) {
483               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
484             }
485 
486 #if defined(PETSC_USE_BOPT_g)
487 #if defined (PETSC_USE_CTABLE)
488             { PetscInt data;
489               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
490               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
491             }
492 #else
493             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
494 #endif
495 #endif
496 #if defined (PETSC_USE_CTABLE)
497 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
498             col  = (col - 1)/bs;
499 #else
500             col = (baij->colmap[in[j]] - 1)/bs;
501 #endif
502             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
503               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
504               col =  in[j];
505             }
506           }
507           else col = in[j];
508           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510       }
511     } else {
512       if (!baij->donotstash) {
513         if (roworiented) {
514           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #define HASH_KEY 0.6180339887
525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
526 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
527 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
528 #undef __FUNCT__
529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
531 {
532   PetscFunctionBegin;
533   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
534   /* PetscFunctionReturn(0); */
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
553   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
558     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
563         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend){
565           col = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined (PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data --;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else {
585       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
586     }
587   }
588  PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatNorm_MPISBAIJ"
593 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
594 {
595   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
596   PetscErrorCode ierr;
597   PetscReal      sum[2],*lnorm2;
598 
599   PetscFunctionBegin;
600   if (baij->size == 1) {
601     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
602   } else {
603     if (type == NORM_FROBENIUS) {
604       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
605       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
607       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
608       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       *norm = sqrt(sum[0] + 2*sum[1]);
611       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
612     } else {
613       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
614     }
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 /*
620   Creates the hash table, and sets the table
621   This table is created only once.
622   If new entried need to be added to the matrix
623   then the hash table has to be destroyed and
624   recreated.
625 */
626 #undef __FUNCT__
627 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
628 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
629 {
630   PetscFunctionBegin;
631   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
632   /* PetscFunctionReturn(0); */
633 }
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
637 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
638 {
639   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
640   PetscErrorCode ierr;
641   PetscInt       nstash,reallocs;
642   InsertMode     addv;
643 
644   PetscFunctionBegin;
645   if (baij->donotstash) {
646     PetscFunctionReturn(0);
647   }
648 
649   /* make sure all processors are either in INSERTMODE or ADDMODE */
650   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
651   if (addv == (ADD_VALUES|INSERT_VALUES)) {
652     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
653   }
654   mat->insertmode = addv; /* in case this processor had no cache */
655 
656   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
657   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
658   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
659   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
660   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
661   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNCT__
666 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
667 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
670   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
671   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,n,flg,bs2=baij->bs2;
674   PetscInt       *row,*col,other_disassembled;
675   PetscTruth     r1,r2,r3;
676   MatScalar      *val;
677   InsertMode     addv = mat->insertmode;
678 
679   PetscFunctionBegin;
680 
681   if (!baij->donotstash) {
682     while (1) {
683       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
684       if (!flg) break;
685 
686       for (i=0; i<n;) {
687         /* Now identify the consecutive vals belonging to the same row */
688         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
693         i = j;
694       }
695     }
696     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
697     /* Now process the block-stash. Since the values are stashed column-oriented,
698        set the roworiented flag to column oriented, and after MatSetValues()
699        restore the original flags */
700     r1 = baij->roworiented;
701     r2 = a->roworiented;
702     r3 = b->roworiented;
703     baij->roworiented = PETSC_FALSE;
704     a->roworiented    = PETSC_FALSE;
705     b->roworiented    = PETSC_FALSE;
706     while (1) {
707       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
708       if (!flg) break;
709 
710       for (i=0; i<n;) {
711         /* Now identify the consecutive vals belonging to the same row */
712         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
713         if (j < n) ncols = j-i;
714         else       ncols = n-i;
715         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
716         i = j;
717       }
718     }
719     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
720     baij->roworiented = r1;
721     a->roworiented    = r2;
722     b->roworiented    = r3;
723   }
724 
725   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
726   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
727 
728   /* determine if any processor has disassembled, if so we must
729      also disassemble ourselfs, in order that we may reassemble. */
730   /*
731      if nonzero structure of submatrix B cannot change then we know that
732      no processor disassembled thus we can skip this stuff
733   */
734   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
735     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
736     if (mat->was_assembled && !other_disassembled) {
737       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
738     }
739   }
740 
741   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
742     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
743   }
744   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
745   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
746 
747 #if defined(PETSC_USE_BOPT_g)
748   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
749     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
750     baij->ht_total_ct  = 0;
751     baij->ht_insert_ct = 0;
752   }
753 #endif
754   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
755     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
756     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
757     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
758   }
759 
760   if (baij->rowvalues) {
761     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
762     baij->rowvalues = 0;
763   }
764 
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
773   PetscErrorCode    ierr;
774   PetscInt          bs = mat->bs;
775   PetscMPIInt       size = baij->size,rank = baij->rank;
776   PetscTruth        iascii,isdraw;
777   PetscViewer       sviewer;
778   PetscViewerFormat format;
779 
780   PetscFunctionBegin;
781   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
782   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
783   if (iascii) {
784     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
785     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
786       MatInfo info;
787       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
788       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
789       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
790               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
791               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
792       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
794       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
795       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
796       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
797       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
798       PetscFunctionReturn(0);
799     } else if (format == PETSC_VIEWER_ASCII_INFO) {
800       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
801       PetscFunctionReturn(0);
802     }
803   }
804 
805   if (isdraw) {
806     PetscDraw       draw;
807     PetscTruth isnull;
808     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
809     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
810   }
811 
812   if (size == 1) {
813     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
814     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
815   } else {
816     /* assemble the entire matrix onto first processor. */
817     Mat         A;
818     Mat_SeqSBAIJ *Aloc;
819     Mat_SeqBAIJ *Bloc;
820     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
821     MatScalar   *a;
822 
823     /* Should this be the same type as mat? */
824     if (!rank) {
825       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
826     } else {
827       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
828     }
829     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
830     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
831     PetscLogObjectParent(mat,A);
832 
833     /* copy over the A part */
834     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
835     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
836     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
837 
838     for (i=0; i<mbs; i++) {
839       rvals[0] = bs*(baij->rstart + i);
840       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
841       for (j=ai[i]; j<ai[i+1]; j++) {
842         col = (baij->cstart+aj[j])*bs;
843         for (k=0; k<bs; k++) {
844           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
845           col++; a += bs;
846         }
847       }
848     }
849     /* copy over the B part */
850     Bloc = (Mat_SeqBAIJ*)baij->B->data;
851     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
852     for (i=0; i<mbs; i++) {
853       rvals[0] = bs*(baij->rstart + i);
854       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
855       for (j=ai[i]; j<ai[i+1]; j++) {
856         col = baij->garray[aj[j]]*bs;
857         for (k=0; k<bs; k++) {
858           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
859           col++; a += bs;
860         }
861       }
862     }
863     ierr = PetscFree(rvals);CHKERRQ(ierr);
864     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
865     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     /*
867        Everyone has to call to draw the matrix since the graphics waits are
868        synchronized across all processors that share the PetscDraw object
869     */
870     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
871     if (!rank) {
872       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
873       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
874     }
875     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
876     ierr = MatDestroy(A);CHKERRQ(ierr);
877   }
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "MatView_MPISBAIJ"
883 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
884 {
885   PetscErrorCode ierr;
886   PetscTruth     iascii,isdraw,issocket,isbinary;
887 
888   PetscFunctionBegin;
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
893   if (iascii || isdraw || issocket || isbinary) {
894     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
895   } else {
896     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatDestroy_MPISBAIJ"
903 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
904 {
905   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
906   PetscErrorCode ierr;
907 
908   PetscFunctionBegin;
909 #if defined(PETSC_USE_LOG)
910   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
911 #endif
912   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
913   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
914   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
915   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
917 #if defined (PETSC_USE_CTABLE)
918   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
919 #else
920   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
921 #endif
922   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
923   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
924   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
925   if (baij->slvec0) {
926     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
927     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
928   }
929   if (baij->slvec1) {
930     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
931     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
933   }
934   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
935   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
936   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
937   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
938 #if defined(PETSC_USE_MAT_SINGLE)
939   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
940 #endif
941   ierr = PetscFree(baij);CHKERRQ(ierr);
942 
943   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
944   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
945   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
946   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatMult_MPISBAIJ"
952 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
953 {
954   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
955   PetscErrorCode ierr;
956   PetscInt       nt,mbs=a->mbs,bs=A->bs;
957   PetscScalar    *x,*from,zero=0.0;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->n) {
962     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
963   }
964   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
965   if (nt != A->m) {
966     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
967   }
968 
969   /* diagonal part */
970   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
971   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
972 
973   /* subdiagonal part */
974   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
975 
976   /* copy x into the vec slvec0 */
977   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
978   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
979   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
980   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
981 
982   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
983   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
984   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
985 
986   /* supperdiagonal part */
987   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
988 
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
994 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
995 {
996   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
997   PetscErrorCode ierr;
998   PetscInt       nt;
999 
1000   PetscFunctionBegin;
1001   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1002   if (nt != A->n) {
1003     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1004   }
1005   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1006   if (nt != A->m) {
1007     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1008   }
1009 
1010   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1011   /* do diagonal part */
1012   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1013   /* do supperdiagonal part */
1014   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1015   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1016   /* do subdiagonal part */
1017   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1018   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1019   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1020 
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1026 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1027 {
1028   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1029   PetscErrorCode ierr;
1030   PetscInt       mbs=a->mbs,bs=A->bs;
1031   PetscScalar    *x,*from,zero=0.0;
1032 
1033   PetscFunctionBegin;
1034   /*
1035   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1036   PetscSynchronizedFlush(A->comm);
1037   */
1038   /* diagonal part */
1039   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1040   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1041 
1042   /* subdiagonal part */
1043   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1044 
1045   /* copy x into the vec slvec0 */
1046   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1047   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1048   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1049   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1050 
1051   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1052   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1053   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1054 
1055   /* supperdiagonal part */
1056   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1057 
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1063 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1064 {
1065   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067 
1068   PetscFunctionBegin;
1069   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1070   /* do diagonal part */
1071   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1072   /* do supperdiagonal part */
1073   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1074   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1075 
1076   /* do subdiagonal part */
1077   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1078   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1079   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1080 
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1086 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1087 {
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1097 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1098 {
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /*
1107   This only works correctly for square matrices where the subblock A->A is the
1108    diagonal block
1109 */
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1112 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1113 {
1114   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1119   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatScale_MPISBAIJ"
1125 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1126 {
1127   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1132   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1138 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1139 {
1140   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1141   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1142   PetscErrorCode ierr;
1143   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1144   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1145   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1146 
1147   PetscFunctionBegin;
1148   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1149   mat->getrowactive = PETSC_TRUE;
1150 
1151   if (!mat->rowvalues && (idx || v)) {
1152     /*
1153         allocate enough space to hold information from the longest row.
1154     */
1155     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1156     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1157     PetscInt     max = 1,mbs = mat->mbs,tmp;
1158     for (i=0; i<mbs; i++) {
1159       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1160       if (max < tmp) { max = tmp; }
1161     }
1162     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1163     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1164   }
1165 
1166   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1167   lrow = row - brstart;  /* local row index */
1168 
1169   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1170   if (!v)   {pvA = 0; pvB = 0;}
1171   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1172   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1173   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1174   nztot = nzA + nzB;
1175 
1176   cmap  = mat->garray;
1177   if (v  || idx) {
1178     if (nztot) {
1179       /* Sort by increasing column numbers, assuming A and B already sorted */
1180       PetscInt imark = -1;
1181       if (v) {
1182         *v = v_p = mat->rowvalues;
1183         for (i=0; i<nzB; i++) {
1184           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1185           else break;
1186         }
1187         imark = i;
1188         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1189         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1190       }
1191       if (idx) {
1192         *idx = idx_p = mat->rowindices;
1193         if (imark > -1) {
1194           for (i=0; i<imark; i++) {
1195             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1196           }
1197         } else {
1198           for (i=0; i<nzB; i++) {
1199             if (cmap[cworkB[i]/bs] < cstart)
1200               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1201             else break;
1202           }
1203           imark = i;
1204         }
1205         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1206         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1207       }
1208     } else {
1209       if (idx) *idx = 0;
1210       if (v)   *v   = 0;
1211     }
1212   }
1213   *nz = nztot;
1214   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1215   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1221 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1222 {
1223   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1224 
1225   PetscFunctionBegin;
1226   if (baij->getrowactive == PETSC_FALSE) {
1227     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1228   }
1229   baij->getrowactive = PETSC_FALSE;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1235 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1236 {
1237   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1238   PetscErrorCode ierr;
1239 
1240   PetscFunctionBegin;
1241   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1242   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1248 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1249 {
1250   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1251   Mat            A = a->A,B = a->B;
1252   PetscErrorCode ierr;
1253   PetscReal      isend[5],irecv[5];
1254 
1255   PetscFunctionBegin;
1256   info->block_size     = (PetscReal)matin->bs;
1257   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1258   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1259   isend[3] = info->memory;  isend[4] = info->mallocs;
1260   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1261   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1262   isend[3] += info->memory;  isend[4] += info->mallocs;
1263   if (flag == MAT_LOCAL) {
1264     info->nz_used      = isend[0];
1265     info->nz_allocated = isend[1];
1266     info->nz_unneeded  = isend[2];
1267     info->memory       = isend[3];
1268     info->mallocs      = isend[4];
1269   } else if (flag == MAT_GLOBAL_MAX) {
1270     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1271     info->nz_used      = irecv[0];
1272     info->nz_allocated = irecv[1];
1273     info->nz_unneeded  = irecv[2];
1274     info->memory       = irecv[3];
1275     info->mallocs      = irecv[4];
1276   } else if (flag == MAT_GLOBAL_SUM) {
1277     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1278     info->nz_used      = irecv[0];
1279     info->nz_allocated = irecv[1];
1280     info->nz_unneeded  = irecv[2];
1281     info->memory       = irecv[3];
1282     info->mallocs      = irecv[4];
1283   } else {
1284     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1285   }
1286   info->rows_global       = (PetscReal)A->M;
1287   info->columns_global    = (PetscReal)A->N;
1288   info->rows_local        = (PetscReal)A->m;
1289   info->columns_local     = (PetscReal)A->N;
1290   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1291   info->fill_ratio_needed = 0;
1292   info->factor_mallocs    = 0;
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1298 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1299 {
1300   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302 
1303   PetscFunctionBegin;
1304   switch (op) {
1305   case MAT_NO_NEW_NONZERO_LOCATIONS:
1306   case MAT_YES_NEW_NONZERO_LOCATIONS:
1307   case MAT_COLUMNS_UNSORTED:
1308   case MAT_COLUMNS_SORTED:
1309   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1310   case MAT_KEEP_ZEROED_ROWS:
1311   case MAT_NEW_NONZERO_LOCATION_ERR:
1312     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1313     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1314     break;
1315   case MAT_ROW_ORIENTED:
1316     a->roworiented = PETSC_TRUE;
1317     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1318     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1319     break;
1320   case MAT_ROWS_SORTED:
1321   case MAT_ROWS_UNSORTED:
1322   case MAT_YES_NEW_DIAGONALS:
1323     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1324     break;
1325   case MAT_COLUMN_ORIENTED:
1326     a->roworiented = PETSC_FALSE;
1327     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1328     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1329     break;
1330   case MAT_IGNORE_OFF_PROC_ENTRIES:
1331     a->donotstash = PETSC_TRUE;
1332     break;
1333   case MAT_NO_NEW_DIAGONALS:
1334     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1335   case MAT_USE_HASH_TABLE:
1336     a->ht_flag = PETSC_TRUE;
1337     break;
1338   case MAT_NOT_SYMMETRIC:
1339   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1340   case MAT_HERMITIAN:
1341     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1342   case MAT_SYMMETRIC:
1343   case MAT_STRUCTURALLY_SYMMETRIC:
1344   case MAT_NOT_HERMITIAN:
1345   case MAT_SYMMETRY_ETERNAL:
1346   case MAT_NOT_SYMMETRY_ETERNAL:
1347     break;
1348   default:
1349     SETERRQ(PETSC_ERR_SUP,"unknown option");
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1356 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1357 {
1358   PetscErrorCode ierr;
1359   PetscFunctionBegin;
1360   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1366 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1367 {
1368   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1369   Mat            a = baij->A,b = baij->B;
1370   PetscErrorCode ierr;
1371   PetscInt       s1,s2,s3;
1372 
1373   PetscFunctionBegin;
1374   if (ll != rr) {
1375     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1376   }
1377   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1378   if (rr) {
1379     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1380     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1381     /* Overlap communication with computation. */
1382     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1383     /*} if (ll) { */
1384     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1385     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1386     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1387     /* } */
1388   /* scale  the diagonal block */
1389   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1390 
1391   /* if (rr) { */
1392     /* Do a scatter end and then right scale the off-diagonal block */
1393     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1394     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1395   }
1396 
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1402 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1403 {
1404   PetscFunctionBegin;
1405   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1410 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1411 {
1412   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1413   MPI_Comm          comm = A->comm;
1414   static PetscTruth called = PETSC_FALSE;
1415   PetscErrorCode    ierr;
1416 
1417   PetscFunctionBegin;
1418   if (!a->rank) {
1419     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1420   }
1421   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1422   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1423   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1429 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1430 {
1431   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1432   PetscErrorCode ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatEqual_MPISBAIJ"
1443 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1444 {
1445   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1446   Mat            a,b,c,d;
1447   PetscTruth     flg;
1448   PetscErrorCode ierr;
1449 
1450   PetscFunctionBegin;
1451   a = matA->A; b = matA->B;
1452   c = matB->A; d = matB->B;
1453 
1454   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1455   if (flg == PETSC_TRUE) {
1456     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1457   }
1458   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 #undef __FUNCT__
1463 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1464 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1465 {
1466   PetscErrorCode ierr;
1467 
1468   PetscFunctionBegin;
1469   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1475 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1476 {
1477   PetscErrorCode ierr;
1478   PetscInt       i;
1479   PetscTruth     flg;
1480 
1481   PetscFunctionBegin;
1482   for (i=0; i<n; i++) {
1483     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1484     if (!flg) {
1485       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1486     }
1487   }
1488   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 
1493 /* -------------------------------------------------------------------*/
1494 static struct _MatOps MatOps_Values = {
1495        MatSetValues_MPISBAIJ,
1496        MatGetRow_MPISBAIJ,
1497        MatRestoreRow_MPISBAIJ,
1498        MatMult_MPISBAIJ,
1499 /* 4*/ MatMultAdd_MPISBAIJ,
1500        MatMultTranspose_MPISBAIJ,
1501        MatMultTransposeAdd_MPISBAIJ,
1502        0,
1503        0,
1504        0,
1505 /*10*/ 0,
1506        0,
1507        0,
1508        MatRelax_MPISBAIJ,
1509        MatTranspose_MPISBAIJ,
1510 /*15*/ MatGetInfo_MPISBAIJ,
1511        MatEqual_MPISBAIJ,
1512        MatGetDiagonal_MPISBAIJ,
1513        MatDiagonalScale_MPISBAIJ,
1514        MatNorm_MPISBAIJ,
1515 /*20*/ MatAssemblyBegin_MPISBAIJ,
1516        MatAssemblyEnd_MPISBAIJ,
1517        0,
1518        MatSetOption_MPISBAIJ,
1519        MatZeroEntries_MPISBAIJ,
1520 /*25*/ MatZeroRows_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*35*/ MatDuplicate_MPISBAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*40*/ 0,
1536        MatGetSubMatrices_MPISBAIJ,
1537        MatIncreaseOverlap_MPISBAIJ,
1538        MatGetValues_MPISBAIJ,
1539        0,
1540 /*45*/ MatPrintHelp_MPISBAIJ,
1541        MatScale_MPISBAIJ,
1542        0,
1543        0,
1544        0,
1545 /*50*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*55*/ 0,
1551        0,
1552        MatSetUnfactored_MPISBAIJ,
1553        0,
1554        MatSetValuesBlocked_MPISBAIJ,
1555 /*60*/ 0,
1556        0,
1557        0,
1558        MatGetPetscMaps_Petsc,
1559        0,
1560 /*65*/ 0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*70*/ MatGetRowMax_MPISBAIJ,
1566        0,
1567        0,
1568        0,
1569        0,
1570 /*75*/ 0,
1571        0,
1572        0,
1573        0,
1574        0,
1575 /*80*/ 0,
1576        0,
1577        0,
1578        0,
1579        MatLoad_MPISBAIJ,
1580 /*85*/ 0,
1581        0,
1582        0,
1583        0,
1584        0,
1585 /*90*/ 0,
1586        0,
1587        0,
1588        0,
1589        0,
1590 /*95*/ 0,
1591        0,
1592        0,
1593        0};
1594 
1595 
1596 EXTERN_C_BEGIN
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1599 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1600 {
1601   PetscFunctionBegin;
1602   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1603   *iscopy = PETSC_FALSE;
1604   PetscFunctionReturn(0);
1605 }
1606 EXTERN_C_END
1607 
1608 EXTERN_C_BEGIN
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1611 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1612 {
1613   Mat_MPISBAIJ   *b;
1614   PetscErrorCode ierr;
1615   PetscInt       i,mbs,Mbs;
1616 
1617   PetscFunctionBegin;
1618   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1619 
1620   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1621   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1622   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1623   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1624   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1625   if (d_nnz) {
1626     for (i=0; i<B->m/bs; i++) {
1627       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1628     }
1629   }
1630   if (o_nnz) {
1631     for (i=0; i<B->m/bs; i++) {
1632       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1633     }
1634   }
1635   B->preallocated = PETSC_TRUE;
1636   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1637   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1638   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1639   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1640 
1641   b   = (Mat_MPISBAIJ*)B->data;
1642   mbs = B->m/bs;
1643   Mbs = B->M/bs;
1644   if (mbs*bs != B->m) {
1645     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1646   }
1647 
1648   B->bs  = bs;
1649   b->bs2 = bs*bs;
1650   b->mbs = mbs;
1651   b->nbs = mbs;
1652   b->Mbs = Mbs;
1653   b->Nbs = Mbs;
1654 
1655   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1656   b->rowners[0]    = 0;
1657   for (i=2; i<=b->size; i++) {
1658     b->rowners[i] += b->rowners[i-1];
1659   }
1660   b->rstart    = b->rowners[b->rank];
1661   b->rend      = b->rowners[b->rank+1];
1662   b->cstart    = b->rstart;
1663   b->cend      = b->rend;
1664   for (i=0; i<=b->size; i++) {
1665     b->rowners_bs[i] = b->rowners[i]*bs;
1666   }
1667   b->rstart_bs = b-> rstart*bs;
1668   b->rend_bs   = b->rend*bs;
1669 
1670   b->cstart_bs = b->cstart*bs;
1671   b->cend_bs   = b->cend*bs;
1672 
1673   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1674   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1675   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1676   PetscLogObjectParent(B,b->A);
1677 
1678   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1679   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1680   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1681   PetscLogObjectParent(B,b->B);
1682 
1683   /* build cache for off array entries formed */
1684   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1685 
1686   PetscFunctionReturn(0);
1687 }
1688 EXTERN_C_END
1689 
1690 /*MC
1691    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1692    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1693 
1694    Options Database Keys:
1695 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1696 
1697   Level: beginner
1698 
1699 .seealso: MatCreateMPISBAIJ
1700 M*/
1701 
1702 EXTERN_C_BEGIN
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatCreate_MPISBAIJ"
1705 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1706 {
1707   Mat_MPISBAIJ   *b;
1708   PetscErrorCode ierr;
1709   PetscTruth     flg;
1710 
1711   PetscFunctionBegin;
1712 
1713   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1714   B->data = (void*)b;
1715   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1716   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1717 
1718   B->ops->destroy    = MatDestroy_MPISBAIJ;
1719   B->ops->view       = MatView_MPISBAIJ;
1720   B->mapping    = 0;
1721   B->factor     = 0;
1722   B->assembled  = PETSC_FALSE;
1723 
1724   B->insertmode = NOT_SET_VALUES;
1725   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1726   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1727 
1728   /* build local table of row and column ownerships */
1729   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1730   b->cowners    = b->rowners + b->size + 2;
1731   b->rowners_bs = b->cowners + b->size + 2;
1732   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1733 
1734   /* build cache for off array entries formed */
1735   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1736   b->donotstash  = PETSC_FALSE;
1737   b->colmap      = PETSC_NULL;
1738   b->garray      = PETSC_NULL;
1739   b->roworiented = PETSC_TRUE;
1740 
1741 #if defined(PETSC_USE_MAT_SINGLE)
1742   /* stuff for MatSetValues_XXX in single precision */
1743   b->setvalueslen     = 0;
1744   b->setvaluescopy    = PETSC_NULL;
1745 #endif
1746 
1747   /* stuff used in block assembly */
1748   b->barray       = 0;
1749 
1750   /* stuff used for matrix vector multiply */
1751   b->lvec         = 0;
1752   b->Mvctx        = 0;
1753   b->slvec0       = 0;
1754   b->slvec0b      = 0;
1755   b->slvec1       = 0;
1756   b->slvec1a      = 0;
1757   b->slvec1b      = 0;
1758   b->sMvctx       = 0;
1759 
1760   /* stuff for MatGetRow() */
1761   b->rowindices   = 0;
1762   b->rowvalues    = 0;
1763   b->getrowactive = PETSC_FALSE;
1764 
1765   /* hash table stuff */
1766   b->ht           = 0;
1767   b->hd           = 0;
1768   b->ht_size      = 0;
1769   b->ht_flag      = PETSC_FALSE;
1770   b->ht_fact      = 0;
1771   b->ht_total_ct  = 0;
1772   b->ht_insert_ct = 0;
1773 
1774   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1775   if (flg) {
1776     PetscReal fact = 1.39;
1777     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1778     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1779     if (fact <= 1.0) fact = 1.39;
1780     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1781     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1782   }
1783   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1784                                      "MatStoreValues_MPISBAIJ",
1785                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1787                                      "MatRetrieveValues_MPISBAIJ",
1788                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1789   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1790                                      "MatGetDiagonalBlock_MPISBAIJ",
1791                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1793                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1794                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1795   B->symmetric                  = PETSC_TRUE;
1796   B->structurally_symmetric     = PETSC_TRUE;
1797   B->symmetric_set              = PETSC_TRUE;
1798   B->structurally_symmetric_set = PETSC_TRUE;
1799   PetscFunctionReturn(0);
1800 }
1801 EXTERN_C_END
1802 
1803 /*MC
1804    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1805 
1806    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1807    and MATMPISBAIJ otherwise.
1808 
1809    Options Database Keys:
1810 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1811 
1812   Level: beginner
1813 
1814 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1815 M*/
1816 
1817 EXTERN_C_BEGIN
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatCreate_SBAIJ"
1820 PetscErrorCode MatCreate_SBAIJ(Mat A)
1821 {
1822   PetscErrorCode ierr;
1823   PetscMPIInt    size;
1824 
1825   PetscFunctionBegin;
1826   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1827   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1828   if (size == 1) {
1829     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1830   } else {
1831     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1832   }
1833   PetscFunctionReturn(0);
1834 }
1835 EXTERN_C_END
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1839 /*@C
1840    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1841    the user should preallocate the matrix storage by setting the parameters
1842    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1843    performance can be increased by more than a factor of 50.
1844 
1845    Collective on Mat
1846 
1847    Input Parameters:
1848 +  A - the matrix
1849 .  bs   - size of blockk
1850 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1851            submatrix  (same for all local rows)
1852 .  d_nnz - array containing the number of block nonzeros in the various block rows
1853            in the upper triangular and diagonal part of the in diagonal portion of the local
1854            (possibly different for each block row) or PETSC_NULL.  You must leave room
1855            for the diagonal entry even if it is zero.
1856 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1857            submatrix (same for all local rows).
1858 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1859            off-diagonal portion of the local submatrix (possibly different for
1860            each block row) or PETSC_NULL.
1861 
1862 
1863    Options Database Keys:
1864 .   -mat_no_unroll - uses code that does not unroll the loops in the
1865                      block calculations (much slower)
1866 .   -mat_block_size - size of the blocks to use
1867 
1868    Notes:
1869 
1870    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1871    than it must be used on all processors that share the object for that argument.
1872 
1873    Storage Information:
1874    For a square global matrix we define each processor's diagonal portion
1875    to be its local rows and the corresponding columns (a square submatrix);
1876    each processor's off-diagonal portion encompasses the remainder of the
1877    local matrix (a rectangular submatrix).
1878 
1879    The user can specify preallocated storage for the diagonal part of
1880    the local submatrix with either d_nz or d_nnz (not both).  Set
1881    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1882    memory allocation.  Likewise, specify preallocated storage for the
1883    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1884 
1885    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1886    the figure below we depict these three local rows and all columns (0-11).
1887 
1888 .vb
1889            0 1 2 3 4 5 6 7 8 9 10 11
1890           -------------------
1891    row 3  |  o o o d d d o o o o o o
1892    row 4  |  o o o d d d o o o o o o
1893    row 5  |  o o o d d d o o o o o o
1894           -------------------
1895 .ve
1896 
1897    Thus, any entries in the d locations are stored in the d (diagonal)
1898    submatrix, and any entries in the o locations are stored in the
1899    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1900    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1901 
1902    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1903    plus the diagonal part of the d matrix,
1904    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1905    In general, for PDE problems in which most nonzeros are near the diagonal,
1906    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1907    or you will get TERRIBLE performance; see the users' manual chapter on
1908    matrices.
1909 
1910    Level: intermediate
1911 
1912 .keywords: matrix, block, aij, compressed row, sparse, parallel
1913 
1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1915 @*/
1916 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1917 {
1918   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1919 
1920   PetscFunctionBegin;
1921   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1922   if (f) {
1923     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatCreateMPISBAIJ"
1930 /*@C
1931    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1932    (block compressed row).  For good matrix assembly performance
1933    the user should preallocate the matrix storage by setting the parameters
1934    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1935    performance can be increased by more than a factor of 50.
1936 
1937    Collective on MPI_Comm
1938 
1939    Input Parameters:
1940 +  comm - MPI communicator
1941 .  bs   - size of blockk
1942 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1943            This value should be the same as the local size used in creating the
1944            y vector for the matrix-vector product y = Ax.
1945 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1946            This value should be the same as the local size used in creating the
1947            x vector for the matrix-vector product y = Ax.
1948 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1949 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1950 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1951            submatrix  (same for all local rows)
1952 .  d_nnz - array containing the number of block nonzeros in the various block rows
1953            in the upper triangular portion of the in diagonal portion of the local
1954            (possibly different for each block block row) or PETSC_NULL.
1955            You must leave room for the diagonal entry even if it is zero.
1956 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1957            submatrix (same for all local rows).
1958 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1959            off-diagonal portion of the local submatrix (possibly different for
1960            each block row) or PETSC_NULL.
1961 
1962    Output Parameter:
1963 .  A - the matrix
1964 
1965    Options Database Keys:
1966 .   -mat_no_unroll - uses code that does not unroll the loops in the
1967                      block calculations (much slower)
1968 .   -mat_block_size - size of the blocks to use
1969 .   -mat_mpi - use the parallel matrix data structures even on one processor
1970                (defaults to using SeqBAIJ format on one processor)
1971 
1972    Notes:
1973    The user MUST specify either the local or global matrix dimensions
1974    (possibly both).
1975 
1976    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1977    than it must be used on all processors that share the object for that argument.
1978 
1979    Storage Information:
1980    For a square global matrix we define each processor's diagonal portion
1981    to be its local rows and the corresponding columns (a square submatrix);
1982    each processor's off-diagonal portion encompasses the remainder of the
1983    local matrix (a rectangular submatrix).
1984 
1985    The user can specify preallocated storage for the diagonal part of
1986    the local submatrix with either d_nz or d_nnz (not both).  Set
1987    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1988    memory allocation.  Likewise, specify preallocated storage for the
1989    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1990 
1991    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1992    the figure below we depict these three local rows and all columns (0-11).
1993 
1994 .vb
1995            0 1 2 3 4 5 6 7 8 9 10 11
1996           -------------------
1997    row 3  |  o o o d d d o o o o o o
1998    row 4  |  o o o d d d o o o o o o
1999    row 5  |  o o o d d d o o o o o o
2000           -------------------
2001 .ve
2002 
2003    Thus, any entries in the d locations are stored in the d (diagonal)
2004    submatrix, and any entries in the o locations are stored in the
2005    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2006    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2007 
2008    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2009    plus the diagonal part of the d matrix,
2010    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2011    In general, for PDE problems in which most nonzeros are near the diagonal,
2012    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2013    or you will get TERRIBLE performance; see the users' manual chapter on
2014    matrices.
2015 
2016    Level: intermediate
2017 
2018 .keywords: matrix, block, aij, compressed row, sparse, parallel
2019 
2020 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2021 @*/
2022 
2023 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2024 {
2025   PetscErrorCode ierr;
2026   PetscMPIInt    size;
2027 
2028   PetscFunctionBegin;
2029   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2030   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2031   if (size > 1) {
2032     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2033     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2034   } else {
2035     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2036     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2037   }
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2044 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2045 {
2046   Mat            mat;
2047   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2048   PetscErrorCode ierr;
2049   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
2050   PetscScalar    *array;
2051 
2052   PetscFunctionBegin;
2053   *newmat       = 0;
2054   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2055   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2056   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2057 
2058   mat->factor       = matin->factor;
2059   mat->preallocated = PETSC_TRUE;
2060   mat->assembled    = PETSC_TRUE;
2061   mat->insertmode   = NOT_SET_VALUES;
2062 
2063   a = (Mat_MPISBAIJ*)mat->data;
2064   mat->bs  = matin->bs;
2065   a->bs2   = oldmat->bs2;
2066   a->mbs   = oldmat->mbs;
2067   a->nbs   = oldmat->nbs;
2068   a->Mbs   = oldmat->Mbs;
2069   a->Nbs   = oldmat->Nbs;
2070 
2071   a->rstart       = oldmat->rstart;
2072   a->rend         = oldmat->rend;
2073   a->cstart       = oldmat->cstart;
2074   a->cend         = oldmat->cend;
2075   a->size         = oldmat->size;
2076   a->rank         = oldmat->rank;
2077   a->donotstash   = oldmat->donotstash;
2078   a->roworiented  = oldmat->roworiented;
2079   a->rowindices   = 0;
2080   a->rowvalues    = 0;
2081   a->getrowactive = PETSC_FALSE;
2082   a->barray       = 0;
2083   a->rstart_bs    = oldmat->rstart_bs;
2084   a->rend_bs      = oldmat->rend_bs;
2085   a->cstart_bs    = oldmat->cstart_bs;
2086   a->cend_bs      = oldmat->cend_bs;
2087 
2088   /* hash table stuff */
2089   a->ht           = 0;
2090   a->hd           = 0;
2091   a->ht_size      = 0;
2092   a->ht_flag      = oldmat->ht_flag;
2093   a->ht_fact      = oldmat->ht_fact;
2094   a->ht_total_ct  = 0;
2095   a->ht_insert_ct = 0;
2096 
2097   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2098   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2099   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2100   if (oldmat->colmap) {
2101 #if defined (PETSC_USE_CTABLE)
2102     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2103 #else
2104     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2105     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2106     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2107 #endif
2108   } else a->colmap = 0;
2109 
2110   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2111     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2112     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2113     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2114   } else a->garray = 0;
2115 
2116   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2117   PetscLogObjectParent(mat,a->lvec);
2118   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2119   PetscLogObjectParent(mat,a->Mvctx);
2120 
2121   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2122   PetscLogObjectParent(mat,a->slvec0);
2123   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2124   PetscLogObjectParent(mat,a->slvec1);
2125 
2126   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2127   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2128   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2130   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2131   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2132   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2133   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2134   PetscLogObjectParent(mat,a->slvec0);
2135   PetscLogObjectParent(mat,a->slvec1);
2136   PetscLogObjectParent(mat,a->slvec0b);
2137   PetscLogObjectParent(mat,a->slvec1a);
2138   PetscLogObjectParent(mat,a->slvec1b);
2139 
2140   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2141   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2142   a->sMvctx = oldmat->sMvctx;
2143   PetscLogObjectParent(mat,a->sMvctx);
2144 
2145   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2146   PetscLogObjectParent(mat,a->A);
2147   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2148   PetscLogObjectParent(mat,a->B);
2149   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2150   *newmat = mat;
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #include "petscsys.h"
2155 
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatLoad_MPISBAIJ"
2158 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2159 {
2160   Mat            A;
2161   PetscErrorCode ierr;
2162   PetscInt       i,nz,j,rstart,rend,fd;
2163   PetscScalar    *vals,*buf;
2164   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2165   MPI_Status     status;
2166   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2167   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2168   PetscInt       *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2169   PetscInt       bs=1,Mbs,mbs,extra_rows;
2170   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2171   PetscInt       dcount,kmax,k,nzcount,tmp;
2172 
2173   PetscFunctionBegin;
2174   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2175 
2176   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2178   if (!rank) {
2179     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2180     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2181     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2182     if (header[3] < 0) {
2183       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2184     }
2185   }
2186 
2187   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2188   M = header[1]; N = header[2];
2189 
2190   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2191 
2192   /*
2193      This code adds extra rows to make sure the number of rows is
2194      divisible by the blocksize
2195   */
2196   Mbs        = M/bs;
2197   extra_rows = bs - M + bs*(Mbs);
2198   if (extra_rows == bs) extra_rows = 0;
2199   else                  Mbs++;
2200   if (extra_rows &&!rank) {
2201     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2202   }
2203 
2204   /* determine ownership of all rows */
2205   mbs        = Mbs/size + ((Mbs % size) > rank);
2206   m          = mbs*bs;
2207   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2208   browners   = rowners + size + 1;
2209   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2210   rowners[0] = 0;
2211   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2212   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2213   rstart = rowners[rank];
2214   rend   = rowners[rank+1];
2215 
2216   /* distribute row lengths to all processors */
2217   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2218   if (!rank) {
2219     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2220     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2221     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2222     ierr = PetscMalloc(size*sizeof(PetscInt),&sndcounts);CHKERRQ(ierr);
2223     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2224     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2225     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2226   } else {
2227     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2228   }
2229 
2230   if (!rank) {   /* procs[0] */
2231     /* calculate the number of nonzeros on each processor */
2232     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2233     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2234     for (i=0; i<size; i++) {
2235       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2236         procsnz[i] += rowlengths[j];
2237       }
2238     }
2239     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2240 
2241     /* determine max buffer needed and allocate it */
2242     maxnz = 0;
2243     for (i=0; i<size; i++) {
2244       maxnz = PetscMax(maxnz,procsnz[i]);
2245     }
2246     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2247 
2248     /* read in my part of the matrix column indices  */
2249     nz     = procsnz[0];
2250     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2251     mycols = ibuf;
2252     if (size == 1)  nz -= extra_rows;
2253     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2254     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2255 
2256     /* read in every ones (except the last) and ship off */
2257     for (i=1; i<size-1; i++) {
2258       nz   = procsnz[i];
2259       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2260       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2261     }
2262     /* read in the stuff for the last proc */
2263     if (size != 1) {
2264       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2265       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2266       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2267       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2268     }
2269     ierr = PetscFree(cols);CHKERRQ(ierr);
2270   } else {  /* procs[i], i>0 */
2271     /* determine buffer space needed for message */
2272     nz = 0;
2273     for (i=0; i<m; i++) {
2274       nz += locrowlens[i];
2275     }
2276     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2277     mycols = ibuf;
2278     /* receive message of column indices*/
2279     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2280     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2281     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2282   }
2283 
2284   /* loop over local rows, determining number of off diagonal entries */
2285   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2286   odlens   = dlens + (rend-rstart);
2287   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2288   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2289   masked1  = mask    + Mbs;
2290   masked2  = masked1 + Mbs;
2291   rowcount = 0; nzcount = 0;
2292   for (i=0; i<mbs; i++) {
2293     dcount  = 0;
2294     odcount = 0;
2295     for (j=0; j<bs; j++) {
2296       kmax = locrowlens[rowcount];
2297       for (k=0; k<kmax; k++) {
2298         tmp = mycols[nzcount++]/bs; /* block col. index */
2299         if (!mask[tmp]) {
2300           mask[tmp] = 1;
2301           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2302           else masked1[dcount++] = tmp; /* entry in diag portion */
2303         }
2304       }
2305       rowcount++;
2306     }
2307 
2308     dlens[i]  = dcount;  /* d_nzz[i] */
2309     odlens[i] = odcount; /* o_nzz[i] */
2310 
2311     /* zero out the mask elements we set */
2312     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2313     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2314   }
2315 
2316   /* create our matrix */
2317   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2318   ierr = MatSetType(A,type);CHKERRQ(ierr);
2319   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2320   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2321 
2322   if (!rank) {
2323     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2324     /* read in my part of the matrix numerical values  */
2325     nz = procsnz[0];
2326     vals = buf;
2327     mycols = ibuf;
2328     if (size == 1)  nz -= extra_rows;
2329     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2330     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2331 
2332     /* insert into matrix */
2333     jj      = rstart*bs;
2334     for (i=0; i<m; i++) {
2335       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2336       mycols += locrowlens[i];
2337       vals   += locrowlens[i];
2338       jj++;
2339     }
2340 
2341     /* read in other processors (except the last one) and ship out */
2342     for (i=1; i<size-1; i++) {
2343       nz   = procsnz[i];
2344       vals = buf;
2345       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2346       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2347     }
2348     /* the last proc */
2349     if (size != 1){
2350       nz   = procsnz[i] - extra_rows;
2351       vals = buf;
2352       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2353       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2354       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2355     }
2356     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2357 
2358   } else {
2359     /* receive numeric values */
2360     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2361 
2362     /* receive message of values*/
2363     vals   = buf;
2364     mycols = ibuf;
2365     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2366     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2367     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2368 
2369     /* insert into matrix */
2370     jj      = rstart*bs;
2371     for (i=0; i<m; i++) {
2372       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2373       mycols += locrowlens[i];
2374       vals   += locrowlens[i];
2375       jj++;
2376     }
2377   }
2378 
2379   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2380   ierr = PetscFree(buf);CHKERRQ(ierr);
2381   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2382   ierr = PetscFree(rowners);CHKERRQ(ierr);
2383   ierr = PetscFree(dlens);CHKERRQ(ierr);
2384   ierr = PetscFree(mask);CHKERRQ(ierr);
2385   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2386   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   *newmat = A;
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2393 /*@
2394    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2395 
2396    Input Parameters:
2397 .  mat  - the matrix
2398 .  fact - factor
2399 
2400    Collective on Mat
2401 
2402    Level: advanced
2403 
2404   Notes:
2405    This can also be set by the command line option: -mat_use_hash_table fact
2406 
2407 .keywords: matrix, hashtable, factor, HT
2408 
2409 .seealso: MatSetOption()
2410 @*/
2411 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2412 {
2413   PetscFunctionBegin;
2414   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
2415   /* PetscFunctionReturn(0); */
2416 }
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2420 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2421 {
2422   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2423   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2424   PetscReal      atmp;
2425   PetscReal      *work,*svalues,*rvalues;
2426   PetscErrorCode ierr;
2427   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2428   PetscMPIInt    rank,size;
2429   PetscInt       *rowners_bs,dest,count,source;
2430   PetscScalar    *va;
2431   MatScalar      *ba;
2432   MPI_Status     stat;
2433 
2434   PetscFunctionBegin;
2435   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2436   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2437 
2438   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2439   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2440 
2441   bs   = A->bs;
2442   mbs  = a->mbs;
2443   Mbs  = a->Mbs;
2444   ba   = b->a;
2445   bi   = b->i;
2446   bj   = b->j;
2447 
2448   /* find ownerships */
2449   rowners_bs = a->rowners_bs;
2450 
2451   /* each proc creates an array to be distributed */
2452   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2453   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2454 
2455   /* row_max for B */
2456   if (rank != size-1){
2457     for (i=0; i<mbs; i++) {
2458       ncols = bi[1] - bi[0]; bi++;
2459       brow  = bs*i;
2460       for (j=0; j<ncols; j++){
2461         bcol = bs*(*bj);
2462         for (kcol=0; kcol<bs; kcol++){
2463           col = bcol + kcol;                 /* local col index */
2464           col += rowners_bs[rank+1];      /* global col index */
2465           for (krow=0; krow<bs; krow++){
2466             atmp = PetscAbsScalar(*ba); ba++;
2467             row = brow + krow;    /* local row index */
2468             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2469             if (work[col] < atmp) work[col] = atmp;
2470           }
2471         }
2472         bj++;
2473       }
2474     }
2475 
2476     /* send values to its owners */
2477     for (dest=rank+1; dest<size; dest++){
2478       svalues = work + rowners_bs[dest];
2479       count   = rowners_bs[dest+1]-rowners_bs[dest];
2480       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2481     }
2482   }
2483 
2484   /* receive values */
2485   if (rank){
2486     rvalues = work;
2487     count   = rowners_bs[rank+1]-rowners_bs[rank];
2488     for (source=0; source<rank; source++){
2489       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2490       /* process values */
2491       for (i=0; i<count; i++){
2492         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2493       }
2494     }
2495   }
2496 
2497   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2498   ierr = PetscFree(work);CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatRelax_MPISBAIJ"
2504 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2505 {
2506   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2507   PetscErrorCode ierr;
2508   PetscInt       mbs=mat->mbs,bs=matin->bs;
2509   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2510   Vec            bb1;
2511 
2512   PetscFunctionBegin;
2513   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2514   if (bs > 1)
2515     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2516 
2517   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2518     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2519       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2520       its--;
2521     }
2522 
2523     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2524     while (its--){
2525 
2526       /* lower triangular part: slvec0b = - B^T*xx */
2527       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2528 
2529       /* copy xx into slvec0a */
2530       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2531       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2532       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2533       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2534 
2535       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2536 
2537       /* copy bb into slvec1a */
2538       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2539       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2540       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2541       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2542 
2543       /* set slvec1b = 0 */
2544       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2545 
2546       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2547       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2548       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2549       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2550 
2551       /* upper triangular part: bb1 = bb1 - B*x */
2552       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2553 
2554       /* local diagonal sweep */
2555       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2556     }
2557     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2558   } else {
2559     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2560   }
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2566 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2567 {
2568   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2569   PetscErrorCode ierr;
2570   PetscScalar    mone=-1.0;
2571   Vec            lvec1,bb1;
2572 
2573   PetscFunctionBegin;
2574   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2575   if (matin->bs > 1)
2576     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2577 
2578   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2579     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2580       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2581       its--;
2582     }
2583 
2584     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2585     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2586     while (its--){
2587       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2588 
2589       /* lower diagonal part: bb1 = bb - B^T*xx */
2590       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2591       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2592 
2593       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2594       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2595       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2596 
2597       /* upper diagonal part: bb1 = bb1 - B*x */
2598       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2599       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2600 
2601       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2602 
2603       /* diagonal sweep */
2604       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2605     }
2606     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2607     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2608   } else {
2609     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2610   }
2611   PetscFunctionReturn(0);
2612 }
2613 
2614