xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 046da0a13659bf6557c8549a2fea89880b2ef309)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
290   /* PetscFunctionReturn(0); */
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,N = m*n*b->bs2;
300   MatScalar      *vsingle;
301 
302   PetscFunctionBegin;
303   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
304   /* PetscFunctionReturn(0); */
305 }
306 #endif
307 
308 /* Only add/insert a(i,j) with i<=j (blocks).
309    Any a(i,j) with i>j input by user is ingored.
310 */
311 #undef __FUNCT__
312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
314 {
315   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
316   MatScalar      value;
317   PetscTruth     roworiented = baij->roworiented;
318   PetscErrorCode ierr;
319   PetscInt       i,j,row,col;
320   PetscInt       rstart_orig=baij->rstart_bs;
321   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
322   PetscInt       cend_orig=baij->cend_bs,bs=baij->bs;
323 
324   /* Some Variables required in the macro */
325   Mat            A = baij->A;
326   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
327   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
328   MatScalar      *aa=a->a;
329 
330   Mat            B = baij->B;
331   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
332   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
333   MatScalar     *ba=b->a;
334 
335   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
336   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
337   MatScalar     *ap,*bap;
338 
339   /* for stash */
340   PetscInt      n_loc, *in_loc=0;
341   MatScalar     *v_loc=0;
342 
343   PetscFunctionBegin;
344 
345   if(!baij->donotstash){
346     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
347     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
348   }
349 
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_BOPT_g)
353     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
354 #endif
355     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
356       row = im[i] - rstart_orig;              /* local row index */
357       for (j=0; j<n; j++) {
358         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
359         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
360           col = in[j] - cstart_orig;          /* local col index */
361           brow = row/bs; bcol = col/bs;
362           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         } else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
369 #endif
370         else {  /* off-diag entry (B) */
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 #if defined (PETSC_USE_CTABLE)
376             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
377             col  = col - 1;
378 #else
379             col = baij->colmap[in[j]/bs] - 1;
380 #endif
381             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
382               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
383               col =  in[j];
384               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
385               B = baij->B;
386               b = (Mat_SeqBAIJ*)(B)->data;
387               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
388               ba=b->a;
389             } else col += in[j]%bs;
390           } else col = in[j];
391           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
392           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
393           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
394         }
395       }
396     } else {  /* off processor entry */
397       if (!baij->donotstash) {
398         n_loc = 0;
399         for (j=0; j<n; j++){
400           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
401           in_loc[n_loc] = in[j];
402           if (roworiented) {
403             v_loc[n_loc] = v[i*n+j];
404           } else {
405             v_loc[n_loc] = v[j*m+i];
406           }
407           n_loc++;
408         }
409         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
410       }
411     }
412   }
413 
414   if(!baij->donotstash){
415     ierr = PetscFree(in_loc);CHKERRQ(ierr);
416     ierr = PetscFree(v_loc);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
424 {
425   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
426   const MatScalar *value;
427   MatScalar       *barray=baij->barray;
428   PetscTruth      roworiented = baij->roworiented;
429   PetscErrorCode  ierr;
430   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
431   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
432   PetscInt        cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
433 
434   PetscFunctionBegin;
435   if(!barray) {
436     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
437     baij->barray = barray;
438   }
439 
440   if (roworiented) {
441     stepval = (n-1)*bs;
442   } else {
443     stepval = (m-1)*bs;
444   }
445   for (i=0; i<m; i++) {
446     if (im[i] < 0) continue;
447 #if defined(PETSC_USE_BOPT_g)
448     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
449 #endif
450     if (im[i] >= rstart && im[i] < rend) {
451       row = im[i] - rstart;
452       for (j=0; j<n; j++) {
453         /* If NumCol = 1 then a copy is not required */
454         if ((roworiented) && (n == 1)) {
455           barray = (MatScalar*) v + i*bs2;
456         } else if((!roworiented) && (m == 1)) {
457           barray = (MatScalar*) v + j*bs2;
458         } else { /* Here a copy is required */
459           if (roworiented) {
460             value = v + i*(stepval+bs)*bs + j*bs;
461           } else {
462             value = v + j*(stepval+bs)*bs + i*bs;
463           }
464           for (ii=0; ii<bs; ii++,value+=stepval) {
465             for (jj=0; jj<bs; jj++) {
466               *barray++  = *value++;
467             }
468           }
469           barray -=bs2;
470         }
471 
472         if (in[j] >= cstart && in[j] < cend){
473           col  = in[j] - cstart;
474           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476         else if (in[j] < 0) continue;
477 #if defined(PETSC_USE_BOPT_g)
478         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
479 #endif
480         else {
481           if (mat->was_assembled) {
482             if (!baij->colmap) {
483               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
484             }
485 
486 #if defined(PETSC_USE_BOPT_g)
487 #if defined (PETSC_USE_CTABLE)
488             { PetscInt data;
489               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
490               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
491             }
492 #else
493             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
494 #endif
495 #endif
496 #if defined (PETSC_USE_CTABLE)
497 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
498             col  = (col - 1)/bs;
499 #else
500             col = (baij->colmap[in[j]] - 1)/bs;
501 #endif
502             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
503               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
504               col =  in[j];
505             }
506           }
507           else col = in[j];
508           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510       }
511     } else {
512       if (!baij->donotstash) {
513         if (roworiented) {
514           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #define HASH_KEY 0.6180339887
525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
526 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
527 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
528 #undef __FUNCT__
529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
531 {
532   PetscFunctionBegin;
533   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
534   /* PetscFunctionReturn(0); */
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
553   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
558     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
563         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend){
565           col = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined (PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data --;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else {
585       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
586     }
587   }
588  PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatNorm_MPISBAIJ"
593 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
594 {
595   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
596   PetscErrorCode ierr;
597   PetscReal      sum[2],*lnorm2;
598 
599   PetscFunctionBegin;
600   if (baij->size == 1) {
601     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
602   } else {
603     if (type == NORM_FROBENIUS) {
604       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
605       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
607       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
608       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       *norm = sqrt(sum[0] + 2*sum[1]);
611       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
612     } else {
613       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
614     }
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 /*
620   Creates the hash table, and sets the table
621   This table is created only once.
622   If new entried need to be added to the matrix
623   then the hash table has to be destroyed and
624   recreated.
625 */
626 #undef __FUNCT__
627 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
628 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
629 {
630   PetscFunctionBegin;
631   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
632   /* PetscFunctionReturn(0); */
633 }
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
637 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
638 {
639   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
640   PetscErrorCode ierr;
641   PetscInt       nstash,reallocs;
642   InsertMode     addv;
643 
644   PetscFunctionBegin;
645   if (baij->donotstash) {
646     PetscFunctionReturn(0);
647   }
648 
649   /* make sure all processors are either in INSERTMODE or ADDMODE */
650   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
651   if (addv == (ADD_VALUES|INSERT_VALUES)) {
652     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
653   }
654   mat->insertmode = addv; /* in case this processor had no cache */
655 
656   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
657   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
658   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
659   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
660   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
661   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNCT__
666 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
667 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
670   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
671   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,n,flg,bs2=baij->bs2;
674   PetscInt       *row,*col,other_disassembled;
675   PetscTruth     r1,r2,r3;
676   MatScalar      *val;
677   InsertMode     addv = mat->insertmode;
678 
679   PetscFunctionBegin;
680 
681   if (!baij->donotstash) {
682     while (1) {
683       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
684       if (!flg) break;
685 
686       for (i=0; i<n;) {
687         /* Now identify the consecutive vals belonging to the same row */
688         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
693         i = j;
694       }
695     }
696     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
697     /* Now process the block-stash. Since the values are stashed column-oriented,
698        set the roworiented flag to column oriented, and after MatSetValues()
699        restore the original flags */
700     r1 = baij->roworiented;
701     r2 = a->roworiented;
702     r3 = b->roworiented;
703     baij->roworiented = PETSC_FALSE;
704     a->roworiented    = PETSC_FALSE;
705     b->roworiented    = PETSC_FALSE;
706     while (1) {
707       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
708       if (!flg) break;
709 
710       for (i=0; i<n;) {
711         /* Now identify the consecutive vals belonging to the same row */
712         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
713         if (j < n) ncols = j-i;
714         else       ncols = n-i;
715         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
716         i = j;
717       }
718     }
719     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
720     baij->roworiented = r1;
721     a->roworiented    = r2;
722     b->roworiented    = r3;
723   }
724 
725   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
726   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
727 
728   /* determine if any processor has disassembled, if so we must
729      also disassemble ourselfs, in order that we may reassemble. */
730   /*
731      if nonzero structure of submatrix B cannot change then we know that
732      no processor disassembled thus we can skip this stuff
733   */
734   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
735     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
736     if (mat->was_assembled && !other_disassembled) {
737       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
738     }
739   }
740 
741   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
742     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
743   }
744   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
745   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
746 
747 #if defined(PETSC_USE_BOPT_g)
748   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
749     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
750     baij->ht_total_ct  = 0;
751     baij->ht_insert_ct = 0;
752   }
753 #endif
754   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
755     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
756     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
757     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
758   }
759 
760   if (baij->rowvalues) {
761     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
762     baij->rowvalues = 0;
763   }
764 
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
773   PetscErrorCode    ierr;
774   PetscInt          bs = baij->bs;
775   PetscMPIInt       size = baij->size,rank = baij->rank;
776   PetscTruth        iascii,isdraw;
777   PetscViewer       sviewer;
778   PetscViewerFormat format;
779 
780   PetscFunctionBegin;
781   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
782   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
783   if (iascii) {
784     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
785     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
786       MatInfo info;
787       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
788       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
789       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
790               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
791               baij->bs,(PetscInt)info.memory);CHKERRQ(ierr);
792       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
794       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
795       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
796       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
797       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
798       PetscFunctionReturn(0);
799     } else if (format == PETSC_VIEWER_ASCII_INFO) {
800       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
801       PetscFunctionReturn(0);
802     }
803   }
804 
805   if (isdraw) {
806     PetscDraw       draw;
807     PetscTruth isnull;
808     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
809     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
810   }
811 
812   if (size == 1) {
813     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
814     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
815   } else {
816     /* assemble the entire matrix onto first processor. */
817     Mat         A;
818     Mat_SeqSBAIJ *Aloc;
819     Mat_SeqBAIJ *Bloc;
820     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
821     MatScalar   *a;
822 
823     /* Should this be the same type as mat? */
824     if (!rank) {
825       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
826     } else {
827       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
828     }
829     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
830     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
831     PetscLogObjectParent(mat,A);
832 
833     /* copy over the A part */
834     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
835     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
836     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
837 
838     for (i=0; i<mbs; i++) {
839       rvals[0] = bs*(baij->rstart + i);
840       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
841       for (j=ai[i]; j<ai[i+1]; j++) {
842         col = (baij->cstart+aj[j])*bs;
843         for (k=0; k<bs; k++) {
844           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
845           col++; a += bs;
846         }
847       }
848     }
849     /* copy over the B part */
850     Bloc = (Mat_SeqBAIJ*)baij->B->data;
851     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
852     for (i=0; i<mbs; i++) {
853       rvals[0] = bs*(baij->rstart + i);
854       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
855       for (j=ai[i]; j<ai[i+1]; j++) {
856         col = baij->garray[aj[j]]*bs;
857         for (k=0; k<bs; k++) {
858           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
859           col++; a += bs;
860         }
861       }
862     }
863     ierr = PetscFree(rvals);CHKERRQ(ierr);
864     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
865     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     /*
867        Everyone has to call to draw the matrix since the graphics waits are
868        synchronized across all processors that share the PetscDraw object
869     */
870     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
871     if (!rank) {
872       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
873       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
874     }
875     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
876     ierr = MatDestroy(A);CHKERRQ(ierr);
877   }
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "MatView_MPISBAIJ"
883 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
884 {
885   PetscErrorCode ierr;
886   PetscTruth     iascii,isdraw,issocket,isbinary;
887 
888   PetscFunctionBegin;
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
893   if (iascii || isdraw || issocket || isbinary) {
894     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
895   } else {
896     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatDestroy_MPISBAIJ"
903 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
904 {
905   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
906   PetscErrorCode ierr;
907 
908   PetscFunctionBegin;
909 #if defined(PETSC_USE_LOG)
910   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
911 #endif
912   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
913   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
914   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
915   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
917 #if defined (PETSC_USE_CTABLE)
918   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
919 #else
920   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
921 #endif
922   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
923   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
924   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
925   if (baij->slvec0) {
926     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
927     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
928   }
929   if (baij->slvec1) {
930     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
931     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
933   }
934   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
935   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
936   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
937   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
938 #if defined(PETSC_USE_MAT_SINGLE)
939   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
940 #endif
941   ierr = PetscFree(baij);CHKERRQ(ierr);
942 
943   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
944   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
945   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
946   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatMult_MPISBAIJ"
952 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
953 {
954   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
955   PetscErrorCode ierr;
956   PetscInt       nt,mbs=a->mbs,bs=a->bs;
957   PetscScalar    *x,*from,zero=0.0;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->n) {
962     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
963   }
964   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
965   if (nt != A->m) {
966     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
967   }
968 
969   /* diagonal part */
970   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
971   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
972 
973   /* subdiagonal part */
974   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
975 
976   /* copy x into the vec slvec0 */
977   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
978   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
979   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
980   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
981 
982   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
983   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
984   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
985 
986   /* supperdiagonal part */
987   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
988 
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
994 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
995 {
996   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
997   PetscErrorCode ierr;
998   PetscInt       nt;
999 
1000   PetscFunctionBegin;
1001   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1002   if (nt != A->n) {
1003     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1004   }
1005   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1006   if (nt != A->m) {
1007     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1008   }
1009 
1010   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1011   /* do diagonal part */
1012   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1013   /* do supperdiagonal part */
1014   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1015   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1016   /* do subdiagonal part */
1017   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1018   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1019   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1020 
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1026 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1027 {
1028   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1029   PetscErrorCode ierr;
1030   PetscInt       mbs=a->mbs,bs=a->bs;
1031   PetscScalar    *x,*from,zero=0.0;
1032 
1033   PetscFunctionBegin;
1034   /*
1035   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1036   PetscSynchronizedFlush(A->comm);
1037   */
1038   /* diagonal part */
1039   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1040   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1041 
1042   /* subdiagonal part */
1043   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1044 
1045   /* copy x into the vec slvec0 */
1046   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1047   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1048   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1049   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1050 
1051   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1052   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1053   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1054 
1055   /* supperdiagonal part */
1056   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1057 
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1063 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1064 {
1065   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067 
1068   PetscFunctionBegin;
1069   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1070   /* do diagonal part */
1071   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1072   /* do supperdiagonal part */
1073   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1074   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1075 
1076   /* do subdiagonal part */
1077   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1078   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1079   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1080 
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1086 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1087 {
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1097 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1098 {
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /*
1107   This only works correctly for square matrices where the subblock A->A is the
1108    diagonal block
1109 */
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1112 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1113 {
1114   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1119   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatScale_MPISBAIJ"
1125 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1126 {
1127   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1132   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1138 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1139 {
1140   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1141   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1142   PetscErrorCode ierr;
1143   PetscInt       bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1144   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1145   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1146 
1147   PetscFunctionBegin;
1148   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1149   mat->getrowactive = PETSC_TRUE;
1150 
1151   if (!mat->rowvalues && (idx || v)) {
1152     /*
1153         allocate enough space to hold information from the longest row.
1154     */
1155     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1156     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1157     PetscInt     max = 1,mbs = mat->mbs,tmp;
1158     for (i=0; i<mbs; i++) {
1159       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1160       if (max < tmp) { max = tmp; }
1161     }
1162     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1163     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1164   }
1165 
1166   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1167   lrow = row - brstart;  /* local row index */
1168 
1169   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1170   if (!v)   {pvA = 0; pvB = 0;}
1171   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1172   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1173   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1174   nztot = nzA + nzB;
1175 
1176   cmap  = mat->garray;
1177   if (v  || idx) {
1178     if (nztot) {
1179       /* Sort by increasing column numbers, assuming A and B already sorted */
1180       PetscInt imark = -1;
1181       if (v) {
1182         *v = v_p = mat->rowvalues;
1183         for (i=0; i<nzB; i++) {
1184           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1185           else break;
1186         }
1187         imark = i;
1188         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1189         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1190       }
1191       if (idx) {
1192         *idx = idx_p = mat->rowindices;
1193         if (imark > -1) {
1194           for (i=0; i<imark; i++) {
1195             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1196           }
1197         } else {
1198           for (i=0; i<nzB; i++) {
1199             if (cmap[cworkB[i]/bs] < cstart)
1200               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1201             else break;
1202           }
1203           imark = i;
1204         }
1205         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1206         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1207       }
1208     } else {
1209       if (idx) *idx = 0;
1210       if (v)   *v   = 0;
1211     }
1212   }
1213   *nz = nztot;
1214   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1215   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1221 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1222 {
1223   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1224 
1225   PetscFunctionBegin;
1226   if (baij->getrowactive == PETSC_FALSE) {
1227     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1228   }
1229   baij->getrowactive = PETSC_FALSE;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1235 PetscErrorCode MatGetBlockSize_MPISBAIJ(Mat mat,PetscInt *bs)
1236 {
1237   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1238 
1239   PetscFunctionBegin;
1240   *bs = baij->bs;
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1246 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1247 {
1248   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1253   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1259 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1260 {
1261   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1262   Mat            A = a->A,B = a->B;
1263   PetscErrorCode ierr;
1264   PetscReal      isend[5],irecv[5];
1265 
1266   PetscFunctionBegin;
1267   info->block_size     = (PetscReal)a->bs;
1268   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1269   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1270   isend[3] = info->memory;  isend[4] = info->mallocs;
1271   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1272   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1273   isend[3] += info->memory;  isend[4] += info->mallocs;
1274   if (flag == MAT_LOCAL) {
1275     info->nz_used      = isend[0];
1276     info->nz_allocated = isend[1];
1277     info->nz_unneeded  = isend[2];
1278     info->memory       = isend[3];
1279     info->mallocs      = isend[4];
1280   } else if (flag == MAT_GLOBAL_MAX) {
1281     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1282     info->nz_used      = irecv[0];
1283     info->nz_allocated = irecv[1];
1284     info->nz_unneeded  = irecv[2];
1285     info->memory       = irecv[3];
1286     info->mallocs      = irecv[4];
1287   } else if (flag == MAT_GLOBAL_SUM) {
1288     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1289     info->nz_used      = irecv[0];
1290     info->nz_allocated = irecv[1];
1291     info->nz_unneeded  = irecv[2];
1292     info->memory       = irecv[3];
1293     info->mallocs      = irecv[4];
1294   } else {
1295     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1296   }
1297   info->rows_global       = (PetscReal)A->M;
1298   info->columns_global    = (PetscReal)A->N;
1299   info->rows_local        = (PetscReal)A->m;
1300   info->columns_local     = (PetscReal)A->N;
1301   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1302   info->fill_ratio_needed = 0;
1303   info->factor_mallocs    = 0;
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1309 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1310 {
1311   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   switch (op) {
1316   case MAT_NO_NEW_NONZERO_LOCATIONS:
1317   case MAT_YES_NEW_NONZERO_LOCATIONS:
1318   case MAT_COLUMNS_UNSORTED:
1319   case MAT_COLUMNS_SORTED:
1320   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1321   case MAT_KEEP_ZEROED_ROWS:
1322   case MAT_NEW_NONZERO_LOCATION_ERR:
1323     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1324     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1325     break;
1326   case MAT_ROW_ORIENTED:
1327     a->roworiented = PETSC_TRUE;
1328     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1329     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1330     break;
1331   case MAT_ROWS_SORTED:
1332   case MAT_ROWS_UNSORTED:
1333   case MAT_YES_NEW_DIAGONALS:
1334     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1335     break;
1336   case MAT_COLUMN_ORIENTED:
1337     a->roworiented = PETSC_FALSE;
1338     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1339     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1340     break;
1341   case MAT_IGNORE_OFF_PROC_ENTRIES:
1342     a->donotstash = PETSC_TRUE;
1343     break;
1344   case MAT_NO_NEW_DIAGONALS:
1345     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1346   case MAT_USE_HASH_TABLE:
1347     a->ht_flag = PETSC_TRUE;
1348     break;
1349   case MAT_NOT_SYMMETRIC:
1350   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1351   case MAT_HERMITIAN:
1352     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1353   case MAT_SYMMETRIC:
1354   case MAT_STRUCTURALLY_SYMMETRIC:
1355   case MAT_NOT_HERMITIAN:
1356   case MAT_SYMMETRY_ETERNAL:
1357   case MAT_NOT_SYMMETRY_ETERNAL:
1358     break;
1359   default:
1360     SETERRQ(PETSC_ERR_SUP,"unknown option");
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1367 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1368 {
1369   PetscErrorCode ierr;
1370   PetscFunctionBegin;
1371   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1377 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1378 {
1379   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1380   Mat            a = baij->A,b = baij->B;
1381   PetscErrorCode ierr;
1382   PetscInt       s1,s2,s3;
1383 
1384   PetscFunctionBegin;
1385   if (ll != rr) {
1386     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1387   }
1388   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1389   if (rr) {
1390     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1391     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1392     /* Overlap communication with computation. */
1393     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1394     /*} if (ll) { */
1395     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1396     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1397     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1398     /* } */
1399   /* scale  the diagonal block */
1400   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1401 
1402   /* if (rr) { */
1403     /* Do a scatter end and then right scale the off-diagonal block */
1404     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1405     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1406   }
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1413 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1414 {
1415   PetscFunctionBegin;
1416   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1421 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1422 {
1423   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1424   MPI_Comm          comm = A->comm;
1425   static PetscTruth called = PETSC_FALSE;
1426   PetscErrorCode    ierr;
1427 
1428   PetscFunctionBegin;
1429   if (!a->rank) {
1430     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1431   }
1432   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1433   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1434   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1440 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1441 {
1442   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1443   PetscErrorCode ierr;
1444 
1445   PetscFunctionBegin;
1446   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatEqual_MPISBAIJ"
1454 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1455 {
1456   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1457   Mat            a,b,c,d;
1458   PetscTruth     flg;
1459   PetscErrorCode ierr;
1460 
1461   PetscFunctionBegin;
1462   a = matA->A; b = matA->B;
1463   c = matB->A; d = matB->B;
1464 
1465   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1466   if (flg == PETSC_TRUE) {
1467     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1468   }
1469   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1475 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1476 {
1477   PetscErrorCode ierr;
1478 
1479   PetscFunctionBegin;
1480   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1486 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1487 {
1488   PetscErrorCode ierr;
1489   PetscInt       i;
1490   PetscTruth     flg;
1491 
1492   PetscFunctionBegin;
1493   for (i=0; i<n; i++) {
1494     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1495     if (!flg) {
1496       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1497     }
1498   }
1499   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 
1504 /* -------------------------------------------------------------------*/
1505 static struct _MatOps MatOps_Values = {
1506        MatSetValues_MPISBAIJ,
1507        MatGetRow_MPISBAIJ,
1508        MatRestoreRow_MPISBAIJ,
1509        MatMult_MPISBAIJ,
1510 /* 4*/ MatMultAdd_MPISBAIJ,
1511        MatMultTranspose_MPISBAIJ,
1512        MatMultTransposeAdd_MPISBAIJ,
1513        0,
1514        0,
1515        0,
1516 /*10*/ 0,
1517        0,
1518        0,
1519        MatRelax_MPISBAIJ,
1520        MatTranspose_MPISBAIJ,
1521 /*15*/ MatGetInfo_MPISBAIJ,
1522        MatEqual_MPISBAIJ,
1523        MatGetDiagonal_MPISBAIJ,
1524        MatDiagonalScale_MPISBAIJ,
1525        MatNorm_MPISBAIJ,
1526 /*20*/ MatAssemblyBegin_MPISBAIJ,
1527        MatAssemblyEnd_MPISBAIJ,
1528        0,
1529        MatSetOption_MPISBAIJ,
1530        MatZeroEntries_MPISBAIJ,
1531 /*25*/ MatZeroRows_MPISBAIJ,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*35*/ MatDuplicate_MPISBAIJ,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*40*/ 0,
1547        MatGetSubMatrices_MPISBAIJ,
1548        MatIncreaseOverlap_MPISBAIJ,
1549        MatGetValues_MPISBAIJ,
1550        0,
1551 /*45*/ MatPrintHelp_MPISBAIJ,
1552        MatScale_MPISBAIJ,
1553        0,
1554        0,
1555        0,
1556 /*50*/ MatGetBlockSize_MPISBAIJ,
1557        0,
1558        0,
1559        0,
1560        0,
1561 /*55*/ 0,
1562        0,
1563        MatSetUnfactored_MPISBAIJ,
1564        0,
1565        MatSetValuesBlocked_MPISBAIJ,
1566 /*60*/ 0,
1567        0,
1568        0,
1569        MatGetPetscMaps_Petsc,
1570        0,
1571 /*65*/ 0,
1572        0,
1573        0,
1574        0,
1575        0,
1576 /*70*/ MatGetRowMax_MPISBAIJ,
1577        0,
1578        0,
1579        0,
1580        0,
1581 /*75*/ 0,
1582        0,
1583        0,
1584        0,
1585        0,
1586 /*80*/ 0,
1587        0,
1588        0,
1589        0,
1590        MatLoad_MPISBAIJ,
1591 /*85*/ 0,
1592        0,
1593        0,
1594        0,
1595        0,
1596 /*90*/ 0,
1597        0,
1598        0,
1599        0,
1600        0,
1601 /*95*/ 0,
1602        0,
1603        0,
1604        0};
1605 
1606 
1607 EXTERN_C_BEGIN
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1610 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1611 {
1612   PetscFunctionBegin;
1613   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1614   *iscopy = PETSC_FALSE;
1615   PetscFunctionReturn(0);
1616 }
1617 EXTERN_C_END
1618 
1619 EXTERN_C_BEGIN
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1622 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1623 {
1624   Mat_MPISBAIJ   *b;
1625   PetscErrorCode ierr;
1626   PetscInt       i,mbs,Mbs;
1627 
1628   PetscFunctionBegin;
1629   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1630 
1631   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1632   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1633   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1634   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1635   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1636   if (d_nnz) {
1637     for (i=0; i<B->m/bs; i++) {
1638       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1639     }
1640   }
1641   if (o_nnz) {
1642     for (i=0; i<B->m/bs; i++) {
1643       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1644     }
1645   }
1646   B->preallocated = PETSC_TRUE;
1647   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1648   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1649   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1650   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1651 
1652   b   = (Mat_MPISBAIJ*)B->data;
1653   mbs = B->m/bs;
1654   Mbs = B->M/bs;
1655   if (mbs*bs != B->m) {
1656     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1657   }
1658 
1659   b->bs  = bs;
1660   b->bs2 = bs*bs;
1661   b->mbs = mbs;
1662   b->nbs = mbs;
1663   b->Mbs = Mbs;
1664   b->Nbs = Mbs;
1665 
1666   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1667   b->rowners[0]    = 0;
1668   for (i=2; i<=b->size; i++) {
1669     b->rowners[i] += b->rowners[i-1];
1670   }
1671   b->rstart    = b->rowners[b->rank];
1672   b->rend      = b->rowners[b->rank+1];
1673   b->cstart    = b->rstart;
1674   b->cend      = b->rend;
1675   for (i=0; i<=b->size; i++) {
1676     b->rowners_bs[i] = b->rowners[i]*bs;
1677   }
1678   b->rstart_bs = b-> rstart*bs;
1679   b->rend_bs   = b->rend*bs;
1680 
1681   b->cstart_bs = b->cstart*bs;
1682   b->cend_bs   = b->cend*bs;
1683 
1684   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1685   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1686   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1687   PetscLogObjectParent(B,b->A);
1688 
1689   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1690   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1691   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1692   PetscLogObjectParent(B,b->B);
1693 
1694   /* build cache for off array entries formed */
1695   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1696 
1697   PetscFunctionReturn(0);
1698 }
1699 EXTERN_C_END
1700 
1701 /*MC
1702    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1703    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1704 
1705    Options Database Keys:
1706 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1707 
1708   Level: beginner
1709 
1710 .seealso: MatCreateMPISBAIJ
1711 M*/
1712 
1713 EXTERN_C_BEGIN
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatCreate_MPISBAIJ"
1716 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1717 {
1718   Mat_MPISBAIJ   *b;
1719   PetscErrorCode ierr;
1720   PetscTruth     flg;
1721 
1722   PetscFunctionBegin;
1723 
1724   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1725   B->data = (void*)b;
1726   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1727   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1728 
1729   B->ops->destroy    = MatDestroy_MPISBAIJ;
1730   B->ops->view       = MatView_MPISBAIJ;
1731   B->mapping    = 0;
1732   B->factor     = 0;
1733   B->assembled  = PETSC_FALSE;
1734 
1735   B->insertmode = NOT_SET_VALUES;
1736   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1737   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1738 
1739   /* build local table of row and column ownerships */
1740   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1741   b->cowners    = b->rowners + b->size + 2;
1742   b->rowners_bs = b->cowners + b->size + 2;
1743   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1744 
1745   /* build cache for off array entries formed */
1746   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1747   b->donotstash  = PETSC_FALSE;
1748   b->colmap      = PETSC_NULL;
1749   b->garray      = PETSC_NULL;
1750   b->roworiented = PETSC_TRUE;
1751 
1752 #if defined(PETSC_USE_MAT_SINGLE)
1753   /* stuff for MatSetValues_XXX in single precision */
1754   b->setvalueslen     = 0;
1755   b->setvaluescopy    = PETSC_NULL;
1756 #endif
1757 
1758   /* stuff used in block assembly */
1759   b->barray       = 0;
1760 
1761   /* stuff used for matrix vector multiply */
1762   b->lvec         = 0;
1763   b->Mvctx        = 0;
1764   b->slvec0       = 0;
1765   b->slvec0b      = 0;
1766   b->slvec1       = 0;
1767   b->slvec1a      = 0;
1768   b->slvec1b      = 0;
1769   b->sMvctx       = 0;
1770 
1771   /* stuff for MatGetRow() */
1772   b->rowindices   = 0;
1773   b->rowvalues    = 0;
1774   b->getrowactive = PETSC_FALSE;
1775 
1776   /* hash table stuff */
1777   b->ht           = 0;
1778   b->hd           = 0;
1779   b->ht_size      = 0;
1780   b->ht_flag      = PETSC_FALSE;
1781   b->ht_fact      = 0;
1782   b->ht_total_ct  = 0;
1783   b->ht_insert_ct = 0;
1784 
1785   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1786   if (flg) {
1787     PetscReal fact = 1.39;
1788     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1789     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1790     if (fact <= 1.0) fact = 1.39;
1791     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1792     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1793   }
1794   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1795                                      "MatStoreValues_MPISBAIJ",
1796                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1798                                      "MatRetrieveValues_MPISBAIJ",
1799                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1801                                      "MatGetDiagonalBlock_MPISBAIJ",
1802                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1804                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1805                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1806   B->symmetric                  = PETSC_TRUE;
1807   B->structurally_symmetric     = PETSC_TRUE;
1808   B->symmetric_set              = PETSC_TRUE;
1809   B->structurally_symmetric_set = PETSC_TRUE;
1810   PetscFunctionReturn(0);
1811 }
1812 EXTERN_C_END
1813 
1814 /*MC
1815    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1816 
1817    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1818    and MATMPISBAIJ otherwise.
1819 
1820    Options Database Keys:
1821 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1822 
1823   Level: beginner
1824 
1825 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1826 M*/
1827 
1828 EXTERN_C_BEGIN
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatCreate_SBAIJ"
1831 PetscErrorCode MatCreate_SBAIJ(Mat A)
1832 {
1833   PetscErrorCode ierr;
1834   PetscMPIInt    size;
1835 
1836   PetscFunctionBegin;
1837   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1838   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1839   if (size == 1) {
1840     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1841   } else {
1842     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 EXTERN_C_END
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1850 /*@C
1851    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1852    the user should preallocate the matrix storage by setting the parameters
1853    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1854    performance can be increased by more than a factor of 50.
1855 
1856    Collective on Mat
1857 
1858    Input Parameters:
1859 +  A - the matrix
1860 .  bs   - size of blockk
1861 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1862            submatrix  (same for all local rows)
1863 .  d_nnz - array containing the number of block nonzeros in the various block rows
1864            in the upper triangular and diagonal part of the in diagonal portion of the local
1865            (possibly different for each block row) or PETSC_NULL.  You must leave room
1866            for the diagonal entry even if it is zero.
1867 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1868            submatrix (same for all local rows).
1869 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1870            off-diagonal portion of the local submatrix (possibly different for
1871            each block row) or PETSC_NULL.
1872 
1873 
1874    Options Database Keys:
1875 .   -mat_no_unroll - uses code that does not unroll the loops in the
1876                      block calculations (much slower)
1877 .   -mat_block_size - size of the blocks to use
1878 
1879    Notes:
1880 
1881    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1882    than it must be used on all processors that share the object for that argument.
1883 
1884    Storage Information:
1885    For a square global matrix we define each processor's diagonal portion
1886    to be its local rows and the corresponding columns (a square submatrix);
1887    each processor's off-diagonal portion encompasses the remainder of the
1888    local matrix (a rectangular submatrix).
1889 
1890    The user can specify preallocated storage for the diagonal part of
1891    the local submatrix with either d_nz or d_nnz (not both).  Set
1892    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1893    memory allocation.  Likewise, specify preallocated storage for the
1894    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1895 
1896    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1897    the figure below we depict these three local rows and all columns (0-11).
1898 
1899 .vb
1900            0 1 2 3 4 5 6 7 8 9 10 11
1901           -------------------
1902    row 3  |  o o o d d d o o o o o o
1903    row 4  |  o o o d d d o o o o o o
1904    row 5  |  o o o d d d o o o o o o
1905           -------------------
1906 .ve
1907 
1908    Thus, any entries in the d locations are stored in the d (diagonal)
1909    submatrix, and any entries in the o locations are stored in the
1910    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1911    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1912 
1913    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1914    plus the diagonal part of the d matrix,
1915    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1916    In general, for PDE problems in which most nonzeros are near the diagonal,
1917    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1918    or you will get TERRIBLE performance; see the users' manual chapter on
1919    matrices.
1920 
1921    Level: intermediate
1922 
1923 .keywords: matrix, block, aij, compressed row, sparse, parallel
1924 
1925 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1926 @*/
1927 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1928 {
1929   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1930 
1931   PetscFunctionBegin;
1932   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1933   if (f) {
1934     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1935   }
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatCreateMPISBAIJ"
1941 /*@C
1942    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1943    (block compressed row).  For good matrix assembly performance
1944    the user should preallocate the matrix storage by setting the parameters
1945    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1946    performance can be increased by more than a factor of 50.
1947 
1948    Collective on MPI_Comm
1949 
1950    Input Parameters:
1951 +  comm - MPI communicator
1952 .  bs   - size of blockk
1953 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1954            This value should be the same as the local size used in creating the
1955            y vector for the matrix-vector product y = Ax.
1956 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1957            This value should be the same as the local size used in creating the
1958            x vector for the matrix-vector product y = Ax.
1959 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1960 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1961 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1962            submatrix  (same for all local rows)
1963 .  d_nnz - array containing the number of block nonzeros in the various block rows
1964            in the upper triangular portion of the in diagonal portion of the local
1965            (possibly different for each block block row) or PETSC_NULL.
1966            You must leave room for the diagonal entry even if it is zero.
1967 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1968            submatrix (same for all local rows).
1969 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1970            off-diagonal portion of the local submatrix (possibly different for
1971            each block row) or PETSC_NULL.
1972 
1973    Output Parameter:
1974 .  A - the matrix
1975 
1976    Options Database Keys:
1977 .   -mat_no_unroll - uses code that does not unroll the loops in the
1978                      block calculations (much slower)
1979 .   -mat_block_size - size of the blocks to use
1980 .   -mat_mpi - use the parallel matrix data structures even on one processor
1981                (defaults to using SeqBAIJ format on one processor)
1982 
1983    Notes:
1984    The user MUST specify either the local or global matrix dimensions
1985    (possibly both).
1986 
1987    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1988    than it must be used on all processors that share the object for that argument.
1989 
1990    Storage Information:
1991    For a square global matrix we define each processor's diagonal portion
1992    to be its local rows and the corresponding columns (a square submatrix);
1993    each processor's off-diagonal portion encompasses the remainder of the
1994    local matrix (a rectangular submatrix).
1995 
1996    The user can specify preallocated storage for the diagonal part of
1997    the local submatrix with either d_nz or d_nnz (not both).  Set
1998    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1999    memory allocation.  Likewise, specify preallocated storage for the
2000    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2001 
2002    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2003    the figure below we depict these three local rows and all columns (0-11).
2004 
2005 .vb
2006            0 1 2 3 4 5 6 7 8 9 10 11
2007           -------------------
2008    row 3  |  o o o d d d o o o o o o
2009    row 4  |  o o o d d d o o o o o o
2010    row 5  |  o o o d d d o o o o o o
2011           -------------------
2012 .ve
2013 
2014    Thus, any entries in the d locations are stored in the d (diagonal)
2015    submatrix, and any entries in the o locations are stored in the
2016    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2017    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2018 
2019    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2020    plus the diagonal part of the d matrix,
2021    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2022    In general, for PDE problems in which most nonzeros are near the diagonal,
2023    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2024    or you will get TERRIBLE performance; see the users' manual chapter on
2025    matrices.
2026 
2027    Level: intermediate
2028 
2029 .keywords: matrix, block, aij, compressed row, sparse, parallel
2030 
2031 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2032 @*/
2033 
2034 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2035 {
2036   PetscErrorCode ierr;
2037   PetscMPIInt    size;
2038 
2039   PetscFunctionBegin;
2040   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2041   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2042   if (size > 1) {
2043     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2044     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2045   } else {
2046     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2047     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2055 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2056 {
2057   Mat            mat;
2058   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2059   PetscErrorCode ierr;
2060   PetscInt       len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2061   PetscScalar    *array;
2062 
2063   PetscFunctionBegin;
2064   *newmat       = 0;
2065   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2066   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2067   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2068 
2069   mat->factor       = matin->factor;
2070   mat->preallocated = PETSC_TRUE;
2071   mat->assembled    = PETSC_TRUE;
2072   mat->insertmode   = NOT_SET_VALUES;
2073 
2074   a = (Mat_MPISBAIJ*)mat->data;
2075   a->bs  = oldmat->bs;
2076   a->bs2 = oldmat->bs2;
2077   a->mbs = oldmat->mbs;
2078   a->nbs = oldmat->nbs;
2079   a->Mbs = oldmat->Mbs;
2080   a->Nbs = oldmat->Nbs;
2081 
2082   a->rstart       = oldmat->rstart;
2083   a->rend         = oldmat->rend;
2084   a->cstart       = oldmat->cstart;
2085   a->cend         = oldmat->cend;
2086   a->size         = oldmat->size;
2087   a->rank         = oldmat->rank;
2088   a->donotstash   = oldmat->donotstash;
2089   a->roworiented  = oldmat->roworiented;
2090   a->rowindices   = 0;
2091   a->rowvalues    = 0;
2092   a->getrowactive = PETSC_FALSE;
2093   a->barray       = 0;
2094   a->rstart_bs    = oldmat->rstart_bs;
2095   a->rend_bs      = oldmat->rend_bs;
2096   a->cstart_bs    = oldmat->cstart_bs;
2097   a->cend_bs      = oldmat->cend_bs;
2098 
2099   /* hash table stuff */
2100   a->ht           = 0;
2101   a->hd           = 0;
2102   a->ht_size      = 0;
2103   a->ht_flag      = oldmat->ht_flag;
2104   a->ht_fact      = oldmat->ht_fact;
2105   a->ht_total_ct  = 0;
2106   a->ht_insert_ct = 0;
2107 
2108   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2109   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2110   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2111   if (oldmat->colmap) {
2112 #if defined (PETSC_USE_CTABLE)
2113     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2114 #else
2115     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2116     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2117     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2118 #endif
2119   } else a->colmap = 0;
2120 
2121   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2122     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2123     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2124     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2125   } else a->garray = 0;
2126 
2127   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2128   PetscLogObjectParent(mat,a->lvec);
2129   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2130   PetscLogObjectParent(mat,a->Mvctx);
2131 
2132   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2133   PetscLogObjectParent(mat,a->slvec0);
2134   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2135   PetscLogObjectParent(mat,a->slvec1);
2136 
2137   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2138   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2139   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2140   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2141   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2142   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2143   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2144   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2145   PetscLogObjectParent(mat,a->slvec0);
2146   PetscLogObjectParent(mat,a->slvec1);
2147   PetscLogObjectParent(mat,a->slvec0b);
2148   PetscLogObjectParent(mat,a->slvec1a);
2149   PetscLogObjectParent(mat,a->slvec1b);
2150 
2151   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2152   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2153   a->sMvctx = oldmat->sMvctx;
2154   PetscLogObjectParent(mat,a->sMvctx);
2155 
2156   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2157   PetscLogObjectParent(mat,a->A);
2158   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2159   PetscLogObjectParent(mat,a->B);
2160   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2161   *newmat = mat;
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #include "petscsys.h"
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatLoad_MPISBAIJ"
2169 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2170 {
2171   Mat            A;
2172   PetscErrorCode ierr;
2173   PetscInt       i,nz,j,rstart,rend,fd;
2174   PetscScalar    *vals,*buf;
2175   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2176   MPI_Status     status;
2177   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2178   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2179   PetscInt       *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2180   PetscInt       bs=1,Mbs,mbs,extra_rows;
2181   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2182   PetscInt       dcount,kmax,k,nzcount,tmp;
2183 
2184   PetscFunctionBegin;
2185   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2186 
2187   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2188   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2189   if (!rank) {
2190     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2191     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2192     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2193     if (header[3] < 0) {
2194       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2195     }
2196   }
2197 
2198   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2199   M = header[1]; N = header[2];
2200 
2201   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2202 
2203   /*
2204      This code adds extra rows to make sure the number of rows is
2205      divisible by the blocksize
2206   */
2207   Mbs        = M/bs;
2208   extra_rows = bs - M + bs*(Mbs);
2209   if (extra_rows == bs) extra_rows = 0;
2210   else                  Mbs++;
2211   if (extra_rows &&!rank) {
2212     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2213   }
2214 
2215   /* determine ownership of all rows */
2216   mbs        = Mbs/size + ((Mbs % size) > rank);
2217   m          = mbs*bs;
2218   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2219   browners   = rowners + size + 1;
2220   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2221   rowners[0] = 0;
2222   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2223   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2224   rstart = rowners[rank];
2225   rend   = rowners[rank+1];
2226 
2227   /* distribute row lengths to all processors */
2228   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2229   if (!rank) {
2230     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2231     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2232     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2233     ierr = PetscMalloc(size*sizeof(PetscInt),&sndcounts);CHKERRQ(ierr);
2234     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2235     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2236     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2237   } else {
2238     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2239   }
2240 
2241   if (!rank) {   /* procs[0] */
2242     /* calculate the number of nonzeros on each processor */
2243     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2244     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2245     for (i=0; i<size; i++) {
2246       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2247         procsnz[i] += rowlengths[j];
2248       }
2249     }
2250     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2251 
2252     /* determine max buffer needed and allocate it */
2253     maxnz = 0;
2254     for (i=0; i<size; i++) {
2255       maxnz = PetscMax(maxnz,procsnz[i]);
2256     }
2257     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2258 
2259     /* read in my part of the matrix column indices  */
2260     nz     = procsnz[0];
2261     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2262     mycols = ibuf;
2263     if (size == 1)  nz -= extra_rows;
2264     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2265     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2266 
2267     /* read in every ones (except the last) and ship off */
2268     for (i=1; i<size-1; i++) {
2269       nz   = procsnz[i];
2270       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2271       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2272     }
2273     /* read in the stuff for the last proc */
2274     if (size != 1) {
2275       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2276       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2277       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2278       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2279     }
2280     ierr = PetscFree(cols);CHKERRQ(ierr);
2281   } else {  /* procs[i], i>0 */
2282     /* determine buffer space needed for message */
2283     nz = 0;
2284     for (i=0; i<m; i++) {
2285       nz += locrowlens[i];
2286     }
2287     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2288     mycols = ibuf;
2289     /* receive message of column indices*/
2290     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2291     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2292     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2293   }
2294 
2295   /* loop over local rows, determining number of off diagonal entries */
2296   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2297   odlens   = dlens + (rend-rstart);
2298   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2299   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2300   masked1  = mask    + Mbs;
2301   masked2  = masked1 + Mbs;
2302   rowcount = 0; nzcount = 0;
2303   for (i=0; i<mbs; i++) {
2304     dcount  = 0;
2305     odcount = 0;
2306     for (j=0; j<bs; j++) {
2307       kmax = locrowlens[rowcount];
2308       for (k=0; k<kmax; k++) {
2309         tmp = mycols[nzcount++]/bs; /* block col. index */
2310         if (!mask[tmp]) {
2311           mask[tmp] = 1;
2312           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2313           else masked1[dcount++] = tmp; /* entry in diag portion */
2314         }
2315       }
2316       rowcount++;
2317     }
2318 
2319     dlens[i]  = dcount;  /* d_nzz[i] */
2320     odlens[i] = odcount; /* o_nzz[i] */
2321 
2322     /* zero out the mask elements we set */
2323     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2324     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2325   }
2326 
2327   /* create our matrix */
2328   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2329   ierr = MatSetType(A,type);CHKERRQ(ierr);
2330   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2331   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2332 
2333   if (!rank) {
2334     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2335     /* read in my part of the matrix numerical values  */
2336     nz = procsnz[0];
2337     vals = buf;
2338     mycols = ibuf;
2339     if (size == 1)  nz -= extra_rows;
2340     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2341     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2342 
2343     /* insert into matrix */
2344     jj      = rstart*bs;
2345     for (i=0; i<m; i++) {
2346       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2347       mycols += locrowlens[i];
2348       vals   += locrowlens[i];
2349       jj++;
2350     }
2351 
2352     /* read in other processors (except the last one) and ship out */
2353     for (i=1; i<size-1; i++) {
2354       nz   = procsnz[i];
2355       vals = buf;
2356       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2357       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2358     }
2359     /* the last proc */
2360     if (size != 1){
2361       nz   = procsnz[i] - extra_rows;
2362       vals = buf;
2363       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2364       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2365       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2366     }
2367     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2368 
2369   } else {
2370     /* receive numeric values */
2371     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2372 
2373     /* receive message of values*/
2374     vals   = buf;
2375     mycols = ibuf;
2376     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2377     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2378     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2379 
2380     /* insert into matrix */
2381     jj      = rstart*bs;
2382     for (i=0; i<m; i++) {
2383       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2384       mycols += locrowlens[i];
2385       vals   += locrowlens[i];
2386       jj++;
2387     }
2388   }
2389 
2390   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2391   ierr = PetscFree(buf);CHKERRQ(ierr);
2392   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2393   ierr = PetscFree(rowners);CHKERRQ(ierr);
2394   ierr = PetscFree(dlens);CHKERRQ(ierr);
2395   ierr = PetscFree(mask);CHKERRQ(ierr);
2396   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2397   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2398   *newmat = A;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2404 /*@
2405    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2406 
2407    Input Parameters:
2408 .  mat  - the matrix
2409 .  fact - factor
2410 
2411    Collective on Mat
2412 
2413    Level: advanced
2414 
2415   Notes:
2416    This can also be set by the command line option: -mat_use_hash_table fact
2417 
2418 .keywords: matrix, hashtable, factor, HT
2419 
2420 .seealso: MatSetOption()
2421 @*/
2422 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2423 {
2424   PetscFunctionBegin;
2425   SETERRQ(PETSC_ERR_SUP,"Function not yet written for SBAIJ format");
2426   /* PetscFunctionReturn(0); */
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2431 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2432 {
2433   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2434   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2435   PetscReal      atmp;
2436   PetscReal      *work,*svalues,*rvalues;
2437   PetscErrorCode ierr;
2438   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2439   PetscMPIInt    rank,size;
2440   PetscInt       *rowners_bs,dest,count,source;
2441   PetscScalar    *va;
2442   MatScalar      *ba;
2443   MPI_Status     stat;
2444 
2445   PetscFunctionBegin;
2446   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2447   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2448 
2449   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2450   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2451 
2452   bs   = a->bs;
2453   mbs  = a->mbs;
2454   Mbs  = a->Mbs;
2455   ba   = b->a;
2456   bi   = b->i;
2457   bj   = b->j;
2458 
2459   /* find ownerships */
2460   rowners_bs = a->rowners_bs;
2461 
2462   /* each proc creates an array to be distributed */
2463   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2464   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2465 
2466   /* row_max for B */
2467   if (rank != size-1){
2468     for (i=0; i<mbs; i++) {
2469       ncols = bi[1] - bi[0]; bi++;
2470       brow  = bs*i;
2471       for (j=0; j<ncols; j++){
2472         bcol = bs*(*bj);
2473         for (kcol=0; kcol<bs; kcol++){
2474           col = bcol + kcol;                 /* local col index */
2475           col += rowners_bs[rank+1];      /* global col index */
2476           for (krow=0; krow<bs; krow++){
2477             atmp = PetscAbsScalar(*ba); ba++;
2478             row = brow + krow;    /* local row index */
2479             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2480             if (work[col] < atmp) work[col] = atmp;
2481           }
2482         }
2483         bj++;
2484       }
2485     }
2486 
2487     /* send values to its owners */
2488     for (dest=rank+1; dest<size; dest++){
2489       svalues = work + rowners_bs[dest];
2490       count   = rowners_bs[dest+1]-rowners_bs[dest];
2491       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2492     }
2493   }
2494 
2495   /* receive values */
2496   if (rank){
2497     rvalues = work;
2498     count   = rowners_bs[rank+1]-rowners_bs[rank];
2499     for (source=0; source<rank; source++){
2500       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2501       /* process values */
2502       for (i=0; i<count; i++){
2503         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2504       }
2505     }
2506   }
2507 
2508   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2509   ierr = PetscFree(work);CHKERRQ(ierr);
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 #undef __FUNCT__
2514 #define __FUNCT__ "MatRelax_MPISBAIJ"
2515 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2516 {
2517   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2518   PetscErrorCode ierr;
2519   PetscInt       mbs=mat->mbs,bs=mat->bs;
2520   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2521   Vec            bb1;
2522 
2523   PetscFunctionBegin;
2524   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2525   if (bs > 1)
2526     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2527 
2528   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2529     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2530       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2531       its--;
2532     }
2533 
2534     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2535     while (its--){
2536 
2537       /* lower triangular part: slvec0b = - B^T*xx */
2538       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2539 
2540       /* copy xx into slvec0a */
2541       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2542       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2543       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2544       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2545 
2546       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2547 
2548       /* copy bb into slvec1a */
2549       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2550       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2551       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2552       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2553 
2554       /* set slvec1b = 0 */
2555       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2556 
2557       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2558       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2559       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2560       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2561 
2562       /* upper triangular part: bb1 = bb1 - B*x */
2563       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2564 
2565       /* local diagonal sweep */
2566       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2567     }
2568     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2569   } else {
2570     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2571   }
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 #undef __FUNCT__
2576 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2577 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2578 {
2579   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2580   PetscErrorCode ierr;
2581   PetscScalar    mone=-1.0;
2582   Vec            lvec1,bb1;
2583 
2584   PetscFunctionBegin;
2585   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2586   if (mat->bs > 1)
2587     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2588 
2589   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2590     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2591       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2592       its--;
2593     }
2594 
2595     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2596     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2597     while (its--){
2598       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2599 
2600       /* lower diagonal part: bb1 = bb - B^T*xx */
2601       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2602       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2603 
2604       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2605       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2606       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2607 
2608       /* upper diagonal part: bb1 = bb1 - B*x */
2609       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2610       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2611 
2612       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2613 
2614       /* diagonal sweep */
2615       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2616     }
2617     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2618     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2619   } else {
2620     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2621   }
2622   PetscFunctionReturn(0);
2623 }
2624 
2625