xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e005ede52eafe2fed2813abb7a7eb3df04d5f886)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   int i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   int i,N = m*n*b->bs2;
263   MatScalar   *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   int i,N = m*n;
286   MatScalar   *vsingle;
287 
288   PetscFunctionBegin;
289   SETERRQ(1,"Function not yet written for SBAIJ format");
290   /* PetscFunctionReturn(0); */
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   int i,N = m*n*b->bs2;
300   MatScalar   *vsingle;
301 
302   PetscFunctionBegin;
303   SETERRQ(1,"Function not yet written for SBAIJ format");
304   /* PetscFunctionReturn(0); */
305 }
306 #endif
307 
308 /* Only add/insert a(i,j) with i<=j (blocks).
309    Any a(i,j) with i>j input by user is ingored.
310 */
311 #undef __FUNCT__
312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
314 {
315   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
316   MatScalar    value;
317   PetscTruth   roworiented = baij->roworiented;
318   PetscErrorCode ierr;
319   int i,j,row,col;
320   int          rstart_orig=baij->rstart_bs;
321   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
322   int          cend_orig=baij->cend_bs,bs=baij->bs;
323 
324   /* Some Variables required in the macro */
325   Mat          A = baij->A;
326   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
327   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
328   MatScalar    *aa=a->a;
329 
330   Mat          B = baij->B;
331   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
332   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
333   MatScalar    *ba=b->a;
334 
335   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
336   int          low,high,t,ridx,cidx,bs2=a->bs2;
337   MatScalar    *ap,*bap;
338 
339   /* for stash */
340   int          n_loc, *in_loc=0;
341   MatScalar    *v_loc=0;
342 
343   PetscFunctionBegin;
344 
345   if(!baij->donotstash){
346     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
347     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
348   }
349 
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_BOPT_g)
353     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
354 #endif
355     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
356       row = im[i] - rstart_orig;              /* local row index */
357       for (j=0; j<n; j++) {
358         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
359         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
360           col = in[j] - cstart_orig;          /* local col index */
361           brow = row/bs; bcol = col/bs;
362           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         } else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
369 #endif
370         else {  /* off-diag entry (B) */
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 #if defined (PETSC_USE_CTABLE)
376             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
377             col  = col - 1;
378 #else
379             col = baij->colmap[in[j]/bs] - 1;
380 #endif
381             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
382               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
383               col =  in[j];
384               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
385               B = baij->B;
386               b = (Mat_SeqBAIJ*)(B)->data;
387               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
388               ba=b->a;
389             } else col += in[j]%bs;
390           } else col = in[j];
391           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
392           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
393           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
394         }
395       }
396     } else {  /* off processor entry */
397       if (!baij->donotstash) {
398         n_loc = 0;
399         for (j=0; j<n; j++){
400           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
401           in_loc[n_loc] = in[j];
402           if (roworiented) {
403             v_loc[n_loc] = v[i*n+j];
404           } else {
405             v_loc[n_loc] = v[j*m+i];
406           }
407           n_loc++;
408         }
409         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
410       }
411     }
412   }
413 
414   if(!baij->donotstash){
415     ierr = PetscFree(in_loc);CHKERRQ(ierr);
416     ierr = PetscFree(v_loc);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
424 {
425   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
426   const MatScalar *value;
427   MatScalar       *barray=baij->barray;
428   PetscTruth      roworiented = baij->roworiented;
429   PetscErrorCode ierr;
430   int i,j,ii,jj,row,col,rstart=baij->rstart;
431   int             rend=baij->rend,cstart=baij->cstart,stepval;
432   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
433 
434   PetscFunctionBegin;
435   if(!barray) {
436     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
437     baij->barray = barray;
438   }
439 
440   if (roworiented) {
441     stepval = (n-1)*bs;
442   } else {
443     stepval = (m-1)*bs;
444   }
445   for (i=0; i<m; i++) {
446     if (im[i] < 0) continue;
447 #if defined(PETSC_USE_BOPT_g)
448     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
449 #endif
450     if (im[i] >= rstart && im[i] < rend) {
451       row = im[i] - rstart;
452       for (j=0; j<n; j++) {
453         /* If NumCol = 1 then a copy is not required */
454         if ((roworiented) && (n == 1)) {
455           barray = (MatScalar*) v + i*bs2;
456         } else if((!roworiented) && (m == 1)) {
457           barray = (MatScalar*) v + j*bs2;
458         } else { /* Here a copy is required */
459           if (roworiented) {
460             value = v + i*(stepval+bs)*bs + j*bs;
461           } else {
462             value = v + j*(stepval+bs)*bs + i*bs;
463           }
464           for (ii=0; ii<bs; ii++,value+=stepval) {
465             for (jj=0; jj<bs; jj++) {
466               *barray++  = *value++;
467             }
468           }
469           barray -=bs2;
470         }
471 
472         if (in[j] >= cstart && in[j] < cend){
473           col  = in[j] - cstart;
474           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476         else if (in[j] < 0) continue;
477 #if defined(PETSC_USE_BOPT_g)
478         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
479 #endif
480         else {
481           if (mat->was_assembled) {
482             if (!baij->colmap) {
483               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
484             }
485 
486 #if defined(PETSC_USE_BOPT_g)
487 #if defined (PETSC_USE_CTABLE)
488             { int data;
489               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
490               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
491             }
492 #else
493             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
494 #endif
495 #endif
496 #if defined (PETSC_USE_CTABLE)
497 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
498             col  = (col - 1)/bs;
499 #else
500             col = (baij->colmap[in[j]] - 1)/bs;
501 #endif
502             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
503               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
504               col =  in[j];
505             }
506           }
507           else col = in[j];
508           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510       }
511     } else {
512       if (!baij->donotstash) {
513         if (roworiented) {
514           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #define HASH_KEY 0.6180339887
525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
526 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
527 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
528 #undef __FUNCT__
529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
531 {
532   PetscFunctionBegin;
533   SETERRQ(1,"Function not yet written for SBAIJ format");
534   /* PetscFunctionReturn(0); */
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(1,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   int          bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
553   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
558     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
563         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend){
565           col = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined (PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data --;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else {
585       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
586     }
587   }
588  PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatNorm_MPISBAIJ"
593 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
594 {
595   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
596   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
597   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
598   PetscErrorCode ierr;
599   PetscReal  sum[2],*lnorm2;
600 
601   PetscFunctionBegin;
602   if (baij->size == 1) {
603     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
604   } else {
605     if (type == NORM_FROBENIUS) {
606       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
607       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
608       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
609       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
610       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
611       /*
612       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
613       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
614       */
615       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
616       /*
617       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
618       PetscSynchronizedFlush(mat->comm); */
619 
620       *norm = sqrt(sum[0] + 2*sum[1]);
621       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
622     } else {
623       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 /*
630   Creates the hash table, and sets the table
631   This table is created only once.
632   If new entried need to be added to the matrix
633   then the hash table has to be destroyed and
634   recreated.
635 */
636 #undef __FUNCT__
637 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
638 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
639 {
640   PetscFunctionBegin;
641   SETERRQ(1,"Function not yet written for SBAIJ format");
642   /* PetscFunctionReturn(0); */
643 }
644 
645 #undef __FUNCT__
646 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
647 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
648 {
649   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
650   PetscErrorCode ierr;
651   int nstash,reallocs;
652   InsertMode  addv;
653 
654   PetscFunctionBegin;
655   if (baij->donotstash) {
656     PetscFunctionReturn(0);
657   }
658 
659   /* make sure all processors are either in INSERTMODE or ADDMODE */
660   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
661   if (addv == (ADD_VALUES|INSERT_VALUES)) {
662     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
663   }
664   mat->insertmode = addv; /* in case this processor had no cache */
665 
666   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
667   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
668   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
669   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
670   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
671   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
677 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
678 {
679   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
680   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
681   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
682   PetscErrorCode ierr;
683   int         i,j,rstart,ncols,n,flg,bs2=baij->bs2;
684   int         *row,*col,other_disassembled;
685   PetscTruth  r1,r2,r3;
686   MatScalar   *val;
687   InsertMode  addv = mat->insertmode;
688 
689   PetscFunctionBegin;
690 
691   if (!baij->donotstash) {
692     while (1) {
693       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
694       /*
695       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
696       PetscSynchronizedFlush(mat->comm);
697       */
698       if (!flg) break;
699 
700       for (i=0; i<n;) {
701         /* Now identify the consecutive vals belonging to the same row */
702         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
703         if (j < n) ncols = j-i;
704         else       ncols = n-i;
705         /* Now assemble all these values with a single function call */
706         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
707         i = j;
708       }
709     }
710     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
711     /* Now process the block-stash. Since the values are stashed column-oriented,
712        set the roworiented flag to column oriented, and after MatSetValues()
713        restore the original flags */
714     r1 = baij->roworiented;
715     r2 = a->roworiented;
716     r3 = b->roworiented;
717     baij->roworiented = PETSC_FALSE;
718     a->roworiented    = PETSC_FALSE;
719     b->roworiented    = PETSC_FALSE;
720     while (1) {
721       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
722       if (!flg) break;
723 
724       for (i=0; i<n;) {
725         /* Now identify the consecutive vals belonging to the same row */
726         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
727         if (j < n) ncols = j-i;
728         else       ncols = n-i;
729         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
730         i = j;
731       }
732     }
733     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
734     baij->roworiented = r1;
735     a->roworiented    = r2;
736     b->roworiented    = r3;
737   }
738 
739   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
741 
742   /* determine if any processor has disassembled, if so we must
743      also disassemble ourselfs, in order that we may reassemble. */
744   /*
745      if nonzero structure of submatrix B cannot change then we know that
746      no processor disassembled thus we can skip this stuff
747   */
748   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
749     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
750     if (mat->was_assembled && !other_disassembled) {
751       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
752     }
753   }
754 
755   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
756     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
757   }
758   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
759   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
760 
761 #if defined(PETSC_USE_BOPT_g)
762   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
763     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
764     baij->ht_total_ct  = 0;
765     baij->ht_insert_ct = 0;
766   }
767 #endif
768   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
769     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
770     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
771     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
772   }
773 
774   if (baij->rowvalues) {
775     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
776     baij->rowvalues = 0;
777   }
778 
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
784 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
785 {
786   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
787   PetscErrorCode ierr;
788   int bs = baij->bs,size = baij->size,rank = baij->rank;
789   PetscTruth        iascii,isdraw;
790   PetscViewer       sviewer;
791   PetscViewerFormat format;
792 
793   PetscFunctionBegin;
794   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
795   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
796   if (iascii) {
797     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
798     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
799       MatInfo info;
800       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
801       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
802       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
803               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
804               baij->bs,(int)info.memory);CHKERRQ(ierr);
805       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
806       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
807       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
808       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
809       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
810       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
811       PetscFunctionReturn(0);
812     } else if (format == PETSC_VIEWER_ASCII_INFO) {
813       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
814       PetscFunctionReturn(0);
815     }
816   }
817 
818   if (isdraw) {
819     PetscDraw       draw;
820     PetscTruth isnull;
821     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
822     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
823   }
824 
825   if (size == 1) {
826     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
827     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
828   } else {
829     /* assemble the entire matrix onto first processor. */
830     Mat         A;
831     Mat_SeqSBAIJ *Aloc;
832     Mat_SeqBAIJ *Bloc;
833     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
834     MatScalar   *a;
835 
836     /* Should this be the same type as mat? */
837     if (!rank) {
838       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
839     } else {
840       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
841     }
842     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
843     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
844     PetscLogObjectParent(mat,A);
845 
846     /* copy over the A part */
847     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
848     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
849     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
850 
851     for (i=0; i<mbs; i++) {
852       rvals[0] = bs*(baij->rstart + i);
853       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
854       for (j=ai[i]; j<ai[i+1]; j++) {
855         col = (baij->cstart+aj[j])*bs;
856         for (k=0; k<bs; k++) {
857           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
858           col++; a += bs;
859         }
860       }
861     }
862     /* copy over the B part */
863     Bloc = (Mat_SeqBAIJ*)baij->B->data;
864     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
865     for (i=0; i<mbs; i++) {
866       rvals[0] = bs*(baij->rstart + i);
867       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
868       for (j=ai[i]; j<ai[i+1]; j++) {
869         col = baij->garray[aj[j]]*bs;
870         for (k=0; k<bs; k++) {
871           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
872           col++; a += bs;
873         }
874       }
875     }
876     ierr = PetscFree(rvals);CHKERRQ(ierr);
877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
879     /*
880        Everyone has to call to draw the matrix since the graphics waits are
881        synchronized across all processors that share the PetscDraw object
882     */
883     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
884     if (!rank) {
885       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
886       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
887     }
888     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
889     ierr = MatDestroy(A);CHKERRQ(ierr);
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatView_MPISBAIJ"
896 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
897 {
898   PetscErrorCode ierr;
899   PetscTruth iascii,isdraw,issocket,isbinary;
900 
901   PetscFunctionBegin;
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
906   if (iascii || isdraw || issocket || isbinary) {
907     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
908   } else {
909     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
910   }
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatDestroy_MPISBAIJ"
916 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
917 {
918   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
919   PetscErrorCode ierr;
920 
921   PetscFunctionBegin;
922 #if defined(PETSC_USE_LOG)
923   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
924 #endif
925   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
926   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
927   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
928   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
929   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
930 #if defined (PETSC_USE_CTABLE)
931   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
932 #else
933   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
934 #endif
935   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
936   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
937   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
938   if (baij->slvec0) {
939     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
940     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
941   }
942   if (baij->slvec1) {
943     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
944     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
945     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
946   }
947   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
948   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
949   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
950   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
951 #if defined(PETSC_USE_MAT_SINGLE)
952   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
953 #endif
954   ierr = PetscFree(baij);CHKERRQ(ierr);
955 
956   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
957   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
958   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
959   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatMult_MPISBAIJ"
965 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
966 {
967   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
968   PetscErrorCode ierr;
969   int nt,mbs=a->mbs,bs=a->bs;
970   PetscScalar *x,*from,zero=0.0;
971 
972   PetscFunctionBegin;
973   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
974   if (nt != A->n) {
975     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
976   }
977   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
978   if (nt != A->m) {
979     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
980   }
981 
982   /* diagonal part */
983   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
984   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
985 
986   /* subdiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
988 
989   /* copy x into the vec slvec0 */
990   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
991   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
993   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
994 
995   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
996   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
998 
999   /* supperdiagonal part */
1000   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1001 
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1007 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1008 {
1009   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1010   PetscErrorCode ierr;
1011   int nt;
1012 
1013   PetscFunctionBegin;
1014   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1015   if (nt != A->n) {
1016     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1017   }
1018   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1019   if (nt != A->m) {
1020     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1021   }
1022 
1023   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1024   /* do diagonal part */
1025   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1026   /* do supperdiagonal part */
1027   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1028   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1029   /* do subdiagonal part */
1030   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1031   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1032   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1033 
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1039 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1040 {
1041   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1042   PetscErrorCode ierr;
1043   int mbs=a->mbs,bs=a->bs;
1044   PetscScalar  *x,*from,zero=0.0;
1045 
1046   PetscFunctionBegin;
1047   /*
1048   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1049   PetscSynchronizedFlush(A->comm);
1050   */
1051   /* diagonal part */
1052   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1053   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1054 
1055   /* subdiagonal part */
1056   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1057 
1058   /* copy x into the vec slvec0 */
1059   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1060   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1061   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1062   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1063 
1064   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1065   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1066   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1067 
1068   /* supperdiagonal part */
1069   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1070 
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1076 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1077 {
1078   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1083   /* do diagonal part */
1084   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1085   /* do supperdiagonal part */
1086   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1088 
1089   /* do subdiagonal part */
1090   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1091   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1092   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1093 
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1099 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1100 {
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1110 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1125 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1126 {
1127   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1132   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatScale_MPISBAIJ"
1138 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1139 {
1140   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1141   PetscErrorCode ierr;
1142 
1143   PetscFunctionBegin;
1144   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1145   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1151 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1152 {
1153   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1154   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1155   PetscErrorCode ierr;
1156   int            bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1157   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1158   int            *cmap,*idx_p,cstart = mat->cstart;
1159 
1160   PetscFunctionBegin;
1161   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1162   mat->getrowactive = PETSC_TRUE;
1163 
1164   if (!mat->rowvalues && (idx || v)) {
1165     /*
1166         allocate enough space to hold information from the longest row.
1167     */
1168     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1169     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1170     int     max = 1,mbs = mat->mbs,tmp;
1171     for (i=0; i<mbs; i++) {
1172       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1173       if (max < tmp) { max = tmp; }
1174     }
1175     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1176     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1177   }
1178 
1179   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1180   lrow = row - brstart;  /* local row index */
1181 
1182   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1183   if (!v)   {pvA = 0; pvB = 0;}
1184   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1185   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1186   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1187   nztot = nzA + nzB;
1188 
1189   cmap  = mat->garray;
1190   if (v  || idx) {
1191     if (nztot) {
1192       /* Sort by increasing column numbers, assuming A and B already sorted */
1193       int imark = -1;
1194       if (v) {
1195         *v = v_p = mat->rowvalues;
1196         for (i=0; i<nzB; i++) {
1197           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1198           else break;
1199         }
1200         imark = i;
1201         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1202         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1203       }
1204       if (idx) {
1205         *idx = idx_p = mat->rowindices;
1206         if (imark > -1) {
1207           for (i=0; i<imark; i++) {
1208             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1209           }
1210         } else {
1211           for (i=0; i<nzB; i++) {
1212             if (cmap[cworkB[i]/bs] < cstart)
1213               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1214             else break;
1215           }
1216           imark = i;
1217         }
1218         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1219         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1220       }
1221     } else {
1222       if (idx) *idx = 0;
1223       if (v)   *v   = 0;
1224     }
1225   }
1226   *nz = nztot;
1227   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1228   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1234 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1235 {
1236   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1237 
1238   PetscFunctionBegin;
1239   if (baij->getrowactive == PETSC_FALSE) {
1240     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1241   }
1242   baij->getrowactive = PETSC_FALSE;
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1248 PetscErrorCode MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1249 {
1250   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1251 
1252   PetscFunctionBegin;
1253   *bs = baij->bs;
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1259 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1260 {
1261   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1262   PetscErrorCode ierr;
1263 
1264   PetscFunctionBegin;
1265   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1266   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1272 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1273 {
1274   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1275   Mat         A = a->A,B = a->B;
1276   PetscErrorCode ierr;
1277   PetscReal   isend[5],irecv[5];
1278 
1279   PetscFunctionBegin;
1280   info->block_size     = (PetscReal)a->bs;
1281   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1282   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1283   isend[3] = info->memory;  isend[4] = info->mallocs;
1284   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1285   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1286   isend[3] += info->memory;  isend[4] += info->mallocs;
1287   if (flag == MAT_LOCAL) {
1288     info->nz_used      = isend[0];
1289     info->nz_allocated = isend[1];
1290     info->nz_unneeded  = isend[2];
1291     info->memory       = isend[3];
1292     info->mallocs      = isend[4];
1293   } else if (flag == MAT_GLOBAL_MAX) {
1294     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1295     info->nz_used      = irecv[0];
1296     info->nz_allocated = irecv[1];
1297     info->nz_unneeded  = irecv[2];
1298     info->memory       = irecv[3];
1299     info->mallocs      = irecv[4];
1300   } else if (flag == MAT_GLOBAL_SUM) {
1301     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1302     info->nz_used      = irecv[0];
1303     info->nz_allocated = irecv[1];
1304     info->nz_unneeded  = irecv[2];
1305     info->memory       = irecv[3];
1306     info->mallocs      = irecv[4];
1307   } else {
1308     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1309   }
1310   info->rows_global       = (PetscReal)A->M;
1311   info->columns_global    = (PetscReal)A->N;
1312   info->rows_local        = (PetscReal)A->m;
1313   info->columns_local     = (PetscReal)A->N;
1314   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1315   info->fill_ratio_needed = 0;
1316   info->factor_mallocs    = 0;
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1322 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1323 {
1324   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   switch (op) {
1329   case MAT_NO_NEW_NONZERO_LOCATIONS:
1330   case MAT_YES_NEW_NONZERO_LOCATIONS:
1331   case MAT_COLUMNS_UNSORTED:
1332   case MAT_COLUMNS_SORTED:
1333   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1334   case MAT_KEEP_ZEROED_ROWS:
1335   case MAT_NEW_NONZERO_LOCATION_ERR:
1336     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1337     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1338     break;
1339   case MAT_ROW_ORIENTED:
1340     a->roworiented = PETSC_TRUE;
1341     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1342     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1343     break;
1344   case MAT_ROWS_SORTED:
1345   case MAT_ROWS_UNSORTED:
1346   case MAT_YES_NEW_DIAGONALS:
1347     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1348     break;
1349   case MAT_COLUMN_ORIENTED:
1350     a->roworiented = PETSC_FALSE;
1351     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1352     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1353     break;
1354   case MAT_IGNORE_OFF_PROC_ENTRIES:
1355     a->donotstash = PETSC_TRUE;
1356     break;
1357   case MAT_NO_NEW_DIAGONALS:
1358     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1359   case MAT_USE_HASH_TABLE:
1360     a->ht_flag = PETSC_TRUE;
1361     break;
1362   case MAT_NOT_SYMMETRIC:
1363   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1364   case MAT_HERMITIAN:
1365     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1366   case MAT_SYMMETRIC:
1367   case MAT_STRUCTURALLY_SYMMETRIC:
1368   case MAT_NOT_HERMITIAN:
1369   case MAT_SYMMETRY_ETERNAL:
1370   case MAT_NOT_SYMMETRY_ETERNAL:
1371     break;
1372   default:
1373     SETERRQ(PETSC_ERR_SUP,"unknown option");
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1380 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1381 {
1382   PetscErrorCode ierr;
1383   PetscFunctionBegin;
1384   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1390 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1391 {
1392   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1393   Mat         a = baij->A,b = baij->B;
1394   PetscErrorCode ierr;
1395   int s1,s2,s3;
1396 
1397   PetscFunctionBegin;
1398   if (ll != rr) {
1399     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1400   }
1401   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1402   if (rr) {
1403     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1404     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1405     /* Overlap communication with computation. */
1406     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1407     /*} if (ll) { */
1408     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1409     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1410     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1411     /* } */
1412   /* scale  the diagonal block */
1413   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1414 
1415   /* if (rr) { */
1416     /* Do a scatter end and then right scale the off-diagonal block */
1417     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1418     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1419   }
1420 
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1426 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1427 {
1428   PetscFunctionBegin;
1429   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1434 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1435 {
1436   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1437   MPI_Comm    comm = A->comm;
1438   static int  called = 0;
1439   PetscErrorCode ierr;
1440 
1441   PetscFunctionBegin;
1442   if (!a->rank) {
1443     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1444   }
1445   if (called) {PetscFunctionReturn(0);} else called = 1;
1446   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1447   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1453 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1454 {
1455   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1456   PetscErrorCode ierr;
1457 
1458   PetscFunctionBegin;
1459   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1464 
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatEqual_MPISBAIJ"
1467 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1468 {
1469   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1470   Mat         a,b,c,d;
1471   PetscTruth  flg;
1472   PetscErrorCode ierr;
1473 
1474   PetscFunctionBegin;
1475   a = matA->A; b = matA->B;
1476   c = matB->A; d = matB->B;
1477 
1478   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1479   if (flg == PETSC_TRUE) {
1480     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1481   }
1482   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1488 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1489 {
1490   PetscErrorCode ierr;
1491 
1492   PetscFunctionBegin;
1493   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1499 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1500 {
1501   PetscErrorCode ierr;
1502   int        i;
1503   PetscTruth flg;
1504 
1505   PetscFunctionBegin;
1506   for (i=0; i<n; i++) {
1507     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1508     if (!flg) {
1509       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1510     }
1511   }
1512   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 
1517 /* -------------------------------------------------------------------*/
1518 static struct _MatOps MatOps_Values = {
1519        MatSetValues_MPISBAIJ,
1520        MatGetRow_MPISBAIJ,
1521        MatRestoreRow_MPISBAIJ,
1522        MatMult_MPISBAIJ,
1523 /* 4*/ MatMultAdd_MPISBAIJ,
1524        MatMultTranspose_MPISBAIJ,
1525        MatMultTransposeAdd_MPISBAIJ,
1526        0,
1527        0,
1528        0,
1529 /*10*/ 0,
1530        0,
1531        0,
1532        MatRelax_MPISBAIJ,
1533        MatTranspose_MPISBAIJ,
1534 /*15*/ MatGetInfo_MPISBAIJ,
1535        MatEqual_MPISBAIJ,
1536        MatGetDiagonal_MPISBAIJ,
1537        MatDiagonalScale_MPISBAIJ,
1538        MatNorm_MPISBAIJ,
1539 /*20*/ MatAssemblyBegin_MPISBAIJ,
1540        MatAssemblyEnd_MPISBAIJ,
1541        0,
1542        MatSetOption_MPISBAIJ,
1543        MatZeroEntries_MPISBAIJ,
1544 /*25*/ MatZeroRows_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1550        0,
1551        0,
1552        0,
1553        0,
1554 /*35*/ MatDuplicate_MPISBAIJ,
1555        0,
1556        0,
1557        0,
1558        0,
1559 /*40*/ 0,
1560        MatGetSubMatrices_MPISBAIJ,
1561        MatIncreaseOverlap_MPISBAIJ,
1562        MatGetValues_MPISBAIJ,
1563        0,
1564 /*45*/ MatPrintHelp_MPISBAIJ,
1565        MatScale_MPISBAIJ,
1566        0,
1567        0,
1568        0,
1569 /*50*/ MatGetBlockSize_MPISBAIJ,
1570        0,
1571        0,
1572        0,
1573        0,
1574 /*55*/ 0,
1575        0,
1576        MatSetUnfactored_MPISBAIJ,
1577        0,
1578        MatSetValuesBlocked_MPISBAIJ,
1579 /*60*/ 0,
1580        0,
1581        0,
1582        MatGetPetscMaps_Petsc,
1583        0,
1584 /*65*/ 0,
1585        0,
1586        0,
1587        0,
1588        0,
1589 /*70*/ MatGetRowMax_MPISBAIJ,
1590        0,
1591        0,
1592        0,
1593        0,
1594 /*75*/ 0,
1595        0,
1596        0,
1597        0,
1598        0,
1599 /*80*/ 0,
1600        0,
1601        0,
1602        0,
1603        MatLoad_MPISBAIJ,
1604 /*85*/ 0,
1605        0,
1606        0,
1607        0,
1608        0,
1609 /*90*/ 0,
1610        0,
1611        0,
1612        0,
1613        0,
1614 /*95*/ 0,
1615        0,
1616        0,
1617        0};
1618 
1619 
1620 EXTERN_C_BEGIN
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1623 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1624 {
1625   PetscFunctionBegin;
1626   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1627   *iscopy = PETSC_FALSE;
1628   PetscFunctionReturn(0);
1629 }
1630 EXTERN_C_END
1631 
1632 EXTERN_C_BEGIN
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1635 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1636 {
1637   Mat_MPISBAIJ *b;
1638   PetscErrorCode ierr;
1639   int i,mbs,Mbs;
1640 
1641   PetscFunctionBegin;
1642   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1643 
1644   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1645   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1646   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1647   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1648   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1649   if (d_nnz) {
1650     for (i=0; i<B->m/bs; i++) {
1651       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1652     }
1653   }
1654   if (o_nnz) {
1655     for (i=0; i<B->m/bs; i++) {
1656       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1657     }
1658   }
1659   B->preallocated = PETSC_TRUE;
1660   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1661   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1662   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1663   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1664 
1665   b   = (Mat_MPISBAIJ*)B->data;
1666   mbs = B->m/bs;
1667   Mbs = B->M/bs;
1668   if (mbs*bs != B->m) {
1669     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1670   }
1671 
1672   b->bs  = bs;
1673   b->bs2 = bs*bs;
1674   b->mbs = mbs;
1675   b->nbs = mbs;
1676   b->Mbs = Mbs;
1677   b->Nbs = Mbs;
1678 
1679   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1680   b->rowners[0]    = 0;
1681   for (i=2; i<=b->size; i++) {
1682     b->rowners[i] += b->rowners[i-1];
1683   }
1684   b->rstart    = b->rowners[b->rank];
1685   b->rend      = b->rowners[b->rank+1];
1686   b->cstart    = b->rstart;
1687   b->cend      = b->rend;
1688   for (i=0; i<=b->size; i++) {
1689     b->rowners_bs[i] = b->rowners[i]*bs;
1690   }
1691   b->rstart_bs = b-> rstart*bs;
1692   b->rend_bs   = b->rend*bs;
1693 
1694   b->cstart_bs = b->cstart*bs;
1695   b->cend_bs   = b->cend*bs;
1696 
1697   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1698   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1699   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1700   PetscLogObjectParent(B,b->A);
1701 
1702   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1703   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1704   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1705   PetscLogObjectParent(B,b->B);
1706 
1707   /* build cache for off array entries formed */
1708   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1709 
1710   PetscFunctionReturn(0);
1711 }
1712 EXTERN_C_END
1713 
1714 /*MC
1715    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1716    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1717 
1718    Options Database Keys:
1719 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1720 
1721   Level: beginner
1722 
1723 .seealso: MatCreateMPISBAIJ
1724 M*/
1725 
1726 EXTERN_C_BEGIN
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatCreate_MPISBAIJ"
1729 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1730 {
1731   Mat_MPISBAIJ *b;
1732   PetscErrorCode ierr;
1733   PetscTruth   flg;
1734 
1735   PetscFunctionBegin;
1736 
1737   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1738   B->data = (void*)b;
1739   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1740   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1741 
1742   B->ops->destroy    = MatDestroy_MPISBAIJ;
1743   B->ops->view       = MatView_MPISBAIJ;
1744   B->mapping    = 0;
1745   B->factor     = 0;
1746   B->assembled  = PETSC_FALSE;
1747 
1748   B->insertmode = NOT_SET_VALUES;
1749   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1750   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1751 
1752   /* build local table of row and column ownerships */
1753   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1754   b->cowners    = b->rowners + b->size + 2;
1755   b->rowners_bs = b->cowners + b->size + 2;
1756   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1757 
1758   /* build cache for off array entries formed */
1759   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1760   b->donotstash  = PETSC_FALSE;
1761   b->colmap      = PETSC_NULL;
1762   b->garray      = PETSC_NULL;
1763   b->roworiented = PETSC_TRUE;
1764 
1765 #if defined(PETSC_USE_MAT_SINGLE)
1766   /* stuff for MatSetValues_XXX in single precision */
1767   b->setvalueslen     = 0;
1768   b->setvaluescopy    = PETSC_NULL;
1769 #endif
1770 
1771   /* stuff used in block assembly */
1772   b->barray       = 0;
1773 
1774   /* stuff used for matrix vector multiply */
1775   b->lvec         = 0;
1776   b->Mvctx        = 0;
1777   b->slvec0       = 0;
1778   b->slvec0b      = 0;
1779   b->slvec1       = 0;
1780   b->slvec1a      = 0;
1781   b->slvec1b      = 0;
1782   b->sMvctx       = 0;
1783 
1784   /* stuff for MatGetRow() */
1785   b->rowindices   = 0;
1786   b->rowvalues    = 0;
1787   b->getrowactive = PETSC_FALSE;
1788 
1789   /* hash table stuff */
1790   b->ht           = 0;
1791   b->hd           = 0;
1792   b->ht_size      = 0;
1793   b->ht_flag      = PETSC_FALSE;
1794   b->ht_fact      = 0;
1795   b->ht_total_ct  = 0;
1796   b->ht_insert_ct = 0;
1797 
1798   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1799   if (flg) {
1800     PetscReal fact = 1.39;
1801     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1802     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1803     if (fact <= 1.0) fact = 1.39;
1804     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1805     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1806   }
1807   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1808                                      "MatStoreValues_MPISBAIJ",
1809                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1810   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1811                                      "MatRetrieveValues_MPISBAIJ",
1812                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1814                                      "MatGetDiagonalBlock_MPISBAIJ",
1815                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1816   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1817                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1818                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1819   B->symmetric                  = PETSC_TRUE;
1820   B->structurally_symmetric     = PETSC_TRUE;
1821   B->symmetric_set              = PETSC_TRUE;
1822   B->structurally_symmetric_set = PETSC_TRUE;
1823   PetscFunctionReturn(0);
1824 }
1825 EXTERN_C_END
1826 
1827 /*MC
1828    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1829 
1830    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1831    and MATMPISBAIJ otherwise.
1832 
1833    Options Database Keys:
1834 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1835 
1836   Level: beginner
1837 
1838 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1839 M*/
1840 
1841 EXTERN_C_BEGIN
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatCreate_SBAIJ"
1844 PetscErrorCode MatCreate_SBAIJ(Mat A)
1845 {
1846   PetscErrorCode ierr;
1847   int size;
1848 
1849   PetscFunctionBegin;
1850   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1851   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1852   if (size == 1) {
1853     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1854   } else {
1855     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1856   }
1857   PetscFunctionReturn(0);
1858 }
1859 EXTERN_C_END
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1863 /*@C
1864    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1865    the user should preallocate the matrix storage by setting the parameters
1866    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1867    performance can be increased by more than a factor of 50.
1868 
1869    Collective on Mat
1870 
1871    Input Parameters:
1872 +  A - the matrix
1873 .  bs   - size of blockk
1874 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1875            submatrix  (same for all local rows)
1876 .  d_nnz - array containing the number of block nonzeros in the various block rows
1877            in the upper triangular and diagonal part of the in diagonal portion of the local
1878            (possibly different for each block row) or PETSC_NULL.  You must leave room
1879            for the diagonal entry even if it is zero.
1880 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1881            submatrix (same for all local rows).
1882 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1883            off-diagonal portion of the local submatrix (possibly different for
1884            each block row) or PETSC_NULL.
1885 
1886 
1887    Options Database Keys:
1888 .   -mat_no_unroll - uses code that does not unroll the loops in the
1889                      block calculations (much slower)
1890 .   -mat_block_size - size of the blocks to use
1891 
1892    Notes:
1893 
1894    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1895    than it must be used on all processors that share the object for that argument.
1896 
1897    Storage Information:
1898    For a square global matrix we define each processor's diagonal portion
1899    to be its local rows and the corresponding columns (a square submatrix);
1900    each processor's off-diagonal portion encompasses the remainder of the
1901    local matrix (a rectangular submatrix).
1902 
1903    The user can specify preallocated storage for the diagonal part of
1904    the local submatrix with either d_nz or d_nnz (not both).  Set
1905    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1906    memory allocation.  Likewise, specify preallocated storage for the
1907    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1908 
1909    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1910    the figure below we depict these three local rows and all columns (0-11).
1911 
1912 .vb
1913            0 1 2 3 4 5 6 7 8 9 10 11
1914           -------------------
1915    row 3  |  o o o d d d o o o o o o
1916    row 4  |  o o o d d d o o o o o o
1917    row 5  |  o o o d d d o o o o o o
1918           -------------------
1919 .ve
1920 
1921    Thus, any entries in the d locations are stored in the d (diagonal)
1922    submatrix, and any entries in the o locations are stored in the
1923    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1924    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1925 
1926    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1927    plus the diagonal part of the d matrix,
1928    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1929    In general, for PDE problems in which most nonzeros are near the diagonal,
1930    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1931    or you will get TERRIBLE performance; see the users' manual chapter on
1932    matrices.
1933 
1934    Level: intermediate
1935 
1936 .keywords: matrix, block, aij, compressed row, sparse, parallel
1937 
1938 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1939 @*/
1940 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1941 {
1942   PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1943 
1944   PetscFunctionBegin;
1945   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1946   if (f) {
1947     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatCreateMPISBAIJ"
1954 /*@C
1955    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1956    (block compressed row).  For good matrix assembly performance
1957    the user should preallocate the matrix storage by setting the parameters
1958    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1959    performance can be increased by more than a factor of 50.
1960 
1961    Collective on MPI_Comm
1962 
1963    Input Parameters:
1964 +  comm - MPI communicator
1965 .  bs   - size of blockk
1966 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1967            This value should be the same as the local size used in creating the
1968            y vector for the matrix-vector product y = Ax.
1969 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1970            This value should be the same as the local size used in creating the
1971            x vector for the matrix-vector product y = Ax.
1972 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1973 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1974 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1975            submatrix  (same for all local rows)
1976 .  d_nnz - array containing the number of block nonzeros in the various block rows
1977            in the upper triangular portion of the in diagonal portion of the local
1978            (possibly different for each block block row) or PETSC_NULL.
1979            You must leave room for the diagonal entry even if it is zero.
1980 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1981            submatrix (same for all local rows).
1982 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1983            off-diagonal portion of the local submatrix (possibly different for
1984            each block row) or PETSC_NULL.
1985 
1986    Output Parameter:
1987 .  A - the matrix
1988 
1989    Options Database Keys:
1990 .   -mat_no_unroll - uses code that does not unroll the loops in the
1991                      block calculations (much slower)
1992 .   -mat_block_size - size of the blocks to use
1993 .   -mat_mpi - use the parallel matrix data structures even on one processor
1994                (defaults to using SeqBAIJ format on one processor)
1995 
1996    Notes:
1997    The user MUST specify either the local or global matrix dimensions
1998    (possibly both).
1999 
2000    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2001    than it must be used on all processors that share the object for that argument.
2002 
2003    Storage Information:
2004    For a square global matrix we define each processor's diagonal portion
2005    to be its local rows and the corresponding columns (a square submatrix);
2006    each processor's off-diagonal portion encompasses the remainder of the
2007    local matrix (a rectangular submatrix).
2008 
2009    The user can specify preallocated storage for the diagonal part of
2010    the local submatrix with either d_nz or d_nnz (not both).  Set
2011    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2012    memory allocation.  Likewise, specify preallocated storage for the
2013    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2014 
2015    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2016    the figure below we depict these three local rows and all columns (0-11).
2017 
2018 .vb
2019            0 1 2 3 4 5 6 7 8 9 10 11
2020           -------------------
2021    row 3  |  o o o d d d o o o o o o
2022    row 4  |  o o o d d d o o o o o o
2023    row 5  |  o o o d d d o o o o o o
2024           -------------------
2025 .ve
2026 
2027    Thus, any entries in the d locations are stored in the d (diagonal)
2028    submatrix, and any entries in the o locations are stored in the
2029    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2030    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2031 
2032    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2033    plus the diagonal part of the d matrix,
2034    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2035    In general, for PDE problems in which most nonzeros are near the diagonal,
2036    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2037    or you will get TERRIBLE performance; see the users' manual chapter on
2038    matrices.
2039 
2040    Level: intermediate
2041 
2042 .keywords: matrix, block, aij, compressed row, sparse, parallel
2043 
2044 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2045 @*/
2046 
2047 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2048 {
2049   PetscErrorCode ierr;
2050   int size;
2051 
2052   PetscFunctionBegin;
2053   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2054   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2055   if (size > 1) {
2056     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2057     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2058   } else {
2059     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2060     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2061   }
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2068 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2069 {
2070   Mat          mat;
2071   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2072   PetscErrorCode ierr;
2073   int len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2074   PetscScalar  *array;
2075 
2076   PetscFunctionBegin;
2077   *newmat       = 0;
2078   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2079   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2080   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2081 
2082   mat->factor       = matin->factor;
2083   mat->preallocated = PETSC_TRUE;
2084   mat->assembled    = PETSC_TRUE;
2085   mat->insertmode   = NOT_SET_VALUES;
2086 
2087   a = (Mat_MPISBAIJ*)mat->data;
2088   a->bs  = oldmat->bs;
2089   a->bs2 = oldmat->bs2;
2090   a->mbs = oldmat->mbs;
2091   a->nbs = oldmat->nbs;
2092   a->Mbs = oldmat->Mbs;
2093   a->Nbs = oldmat->Nbs;
2094 
2095   a->rstart       = oldmat->rstart;
2096   a->rend         = oldmat->rend;
2097   a->cstart       = oldmat->cstart;
2098   a->cend         = oldmat->cend;
2099   a->size         = oldmat->size;
2100   a->rank         = oldmat->rank;
2101   a->donotstash   = oldmat->donotstash;
2102   a->roworiented  = oldmat->roworiented;
2103   a->rowindices   = 0;
2104   a->rowvalues    = 0;
2105   a->getrowactive = PETSC_FALSE;
2106   a->barray       = 0;
2107   a->rstart_bs    = oldmat->rstart_bs;
2108   a->rend_bs      = oldmat->rend_bs;
2109   a->cstart_bs    = oldmat->cstart_bs;
2110   a->cend_bs      = oldmat->cend_bs;
2111 
2112   /* hash table stuff */
2113   a->ht           = 0;
2114   a->hd           = 0;
2115   a->ht_size      = 0;
2116   a->ht_flag      = oldmat->ht_flag;
2117   a->ht_fact      = oldmat->ht_fact;
2118   a->ht_total_ct  = 0;
2119   a->ht_insert_ct = 0;
2120 
2121   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2122   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2123   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2124   if (oldmat->colmap) {
2125 #if defined (PETSC_USE_CTABLE)
2126     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2127 #else
2128     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2129     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2130     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2131 #endif
2132   } else a->colmap = 0;
2133 
2134   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2135     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2136     PetscLogObjectMemory(mat,len*sizeof(int));
2137     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2138   } else a->garray = 0;
2139 
2140   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2141   PetscLogObjectParent(mat,a->lvec);
2142   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2143   PetscLogObjectParent(mat,a->Mvctx);
2144 
2145   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2146   PetscLogObjectParent(mat,a->slvec0);
2147   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2148   PetscLogObjectParent(mat,a->slvec1);
2149 
2150   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2151   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2152   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2153   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2154   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2155   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2156   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2157   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2158   PetscLogObjectParent(mat,a->slvec0);
2159   PetscLogObjectParent(mat,a->slvec1);
2160   PetscLogObjectParent(mat,a->slvec0b);
2161   PetscLogObjectParent(mat,a->slvec1a);
2162   PetscLogObjectParent(mat,a->slvec1b);
2163 
2164   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2165   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2166   a->sMvctx = oldmat->sMvctx;
2167   PetscLogObjectParent(mat,a->sMvctx);
2168 
2169   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2170   PetscLogObjectParent(mat,a->A);
2171   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2172   PetscLogObjectParent(mat,a->B);
2173   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2174   *newmat = mat;
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 #include "petscsys.h"
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatLoad_MPISBAIJ"
2182 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2183 {
2184   Mat          A;
2185   PetscErrorCode ierr;
2186   int          i,nz,j,rstart,rend,fd;
2187   PetscScalar  *vals,*buf;
2188   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2189   MPI_Status   status;
2190   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2191   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2192   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2193   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2194   int          dcount,kmax,k,nzcount,tmp;
2195 
2196   PetscFunctionBegin;
2197   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2198 
2199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2200   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2201   if (!rank) {
2202     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2203     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2204     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2205     if (header[3] < 0) {
2206       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2207     }
2208   }
2209 
2210   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2211   M = header[1]; N = header[2];
2212 
2213   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2214 
2215   /*
2216      This code adds extra rows to make sure the number of rows is
2217      divisible by the blocksize
2218   */
2219   Mbs        = M/bs;
2220   extra_rows = bs - M + bs*(Mbs);
2221   if (extra_rows == bs) extra_rows = 0;
2222   else                  Mbs++;
2223   if (extra_rows &&!rank) {
2224     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2225   }
2226 
2227   /* determine ownership of all rows */
2228   mbs        = Mbs/size + ((Mbs % size) > rank);
2229   m          = mbs*bs;
2230   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2231   browners   = rowners + size + 1;
2232   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2233   rowners[0] = 0;
2234   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2235   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2236   rstart = rowners[rank];
2237   rend   = rowners[rank+1];
2238 
2239   /* distribute row lengths to all processors */
2240   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2241   if (!rank) {
2242     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2243     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2244     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2245     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2246     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2247     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2248     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2249   } else {
2250     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2251   }
2252 
2253   if (!rank) {   /* procs[0] */
2254     /* calculate the number of nonzeros on each processor */
2255     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2256     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2257     for (i=0; i<size; i++) {
2258       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2259         procsnz[i] += rowlengths[j];
2260       }
2261     }
2262     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2263 
2264     /* determine max buffer needed and allocate it */
2265     maxnz = 0;
2266     for (i=0; i<size; i++) {
2267       maxnz = PetscMax(maxnz,procsnz[i]);
2268     }
2269     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2270 
2271     /* read in my part of the matrix column indices  */
2272     nz     = procsnz[0];
2273     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2274     mycols = ibuf;
2275     if (size == 1)  nz -= extra_rows;
2276     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2277     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2278 
2279     /* read in every ones (except the last) and ship off */
2280     for (i=1; i<size-1; i++) {
2281       nz   = procsnz[i];
2282       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2283       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2284     }
2285     /* read in the stuff for the last proc */
2286     if (size != 1) {
2287       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2288       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2289       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2290       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2291     }
2292     ierr = PetscFree(cols);CHKERRQ(ierr);
2293   } else {  /* procs[i], i>0 */
2294     /* determine buffer space needed for message */
2295     nz = 0;
2296     for (i=0; i<m; i++) {
2297       nz += locrowlens[i];
2298     }
2299     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2300     mycols = ibuf;
2301     /* receive message of column indices*/
2302     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2303     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2304     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2305   }
2306 
2307   /* loop over local rows, determining number of off diagonal entries */
2308   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2309   odlens   = dlens + (rend-rstart);
2310   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2311   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2312   masked1  = mask    + Mbs;
2313   masked2  = masked1 + Mbs;
2314   rowcount = 0; nzcount = 0;
2315   for (i=0; i<mbs; i++) {
2316     dcount  = 0;
2317     odcount = 0;
2318     for (j=0; j<bs; j++) {
2319       kmax = locrowlens[rowcount];
2320       for (k=0; k<kmax; k++) {
2321         tmp = mycols[nzcount++]/bs; /* block col. index */
2322         if (!mask[tmp]) {
2323           mask[tmp] = 1;
2324           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2325           else masked1[dcount++] = tmp; /* entry in diag portion */
2326         }
2327       }
2328       rowcount++;
2329     }
2330 
2331     dlens[i]  = dcount;  /* d_nzz[i] */
2332     odlens[i] = odcount; /* o_nzz[i] */
2333 
2334     /* zero out the mask elements we set */
2335     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2336     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2337   }
2338 
2339   /* create our matrix */
2340   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2341   ierr = MatSetType(A,type);CHKERRQ(ierr);
2342   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2343   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2344 
2345   if (!rank) {
2346     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2347     /* read in my part of the matrix numerical values  */
2348     nz = procsnz[0];
2349     vals = buf;
2350     mycols = ibuf;
2351     if (size == 1)  nz -= extra_rows;
2352     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2353     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2354 
2355     /* insert into matrix */
2356     jj      = rstart*bs;
2357     for (i=0; i<m; i++) {
2358       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2359       mycols += locrowlens[i];
2360       vals   += locrowlens[i];
2361       jj++;
2362     }
2363 
2364     /* read in other processors (except the last one) and ship out */
2365     for (i=1; i<size-1; i++) {
2366       nz   = procsnz[i];
2367       vals = buf;
2368       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2369       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2370     }
2371     /* the last proc */
2372     if (size != 1){
2373       nz   = procsnz[i] - extra_rows;
2374       vals = buf;
2375       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2376       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2377       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2378     }
2379     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2380 
2381   } else {
2382     /* receive numeric values */
2383     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2384 
2385     /* receive message of values*/
2386     vals   = buf;
2387     mycols = ibuf;
2388     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2389     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2390     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2391 
2392     /* insert into matrix */
2393     jj      = rstart*bs;
2394     for (i=0; i<m; i++) {
2395       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2396       mycols += locrowlens[i];
2397       vals   += locrowlens[i];
2398       jj++;
2399     }
2400   }
2401 
2402   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2403   ierr = PetscFree(buf);CHKERRQ(ierr);
2404   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2405   ierr = PetscFree(rowners);CHKERRQ(ierr);
2406   ierr = PetscFree(dlens);CHKERRQ(ierr);
2407   ierr = PetscFree(mask);CHKERRQ(ierr);
2408   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2410   *newmat = A;
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2416 /*@
2417    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2418 
2419    Input Parameters:
2420 .  mat  - the matrix
2421 .  fact - factor
2422 
2423    Collective on Mat
2424 
2425    Level: advanced
2426 
2427   Notes:
2428    This can also be set by the command line option: -mat_use_hash_table fact
2429 
2430 .keywords: matrix, hashtable, factor, HT
2431 
2432 .seealso: MatSetOption()
2433 @*/
2434 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2435 {
2436   PetscFunctionBegin;
2437   SETERRQ(1,"Function not yet written for SBAIJ format");
2438   /* PetscFunctionReturn(0); */
2439 }
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2443 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2444 {
2445   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2446   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2447   PetscReal    atmp;
2448   PetscReal    *work,*svalues,*rvalues;
2449   PetscErrorCode ierr;
2450   int i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2451   int          rank,size,*rowners_bs,dest,count,source;
2452   PetscScalar  *va;
2453   MatScalar    *ba;
2454   MPI_Status   stat;
2455 
2456   PetscFunctionBegin;
2457   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2458   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2459 
2460   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2461   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2462 
2463   bs   = a->bs;
2464   mbs  = a->mbs;
2465   Mbs  = a->Mbs;
2466   ba   = b->a;
2467   bi   = b->i;
2468   bj   = b->j;
2469   /*
2470   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2471   PetscSynchronizedFlush(A->comm);
2472   */
2473 
2474   /* find ownerships */
2475   rowners_bs = a->rowners_bs;
2476   /*
2477   if (!rank){
2478     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2479   }
2480   */
2481 
2482   /* each proc creates an array to be distributed */
2483   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2484   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2485 
2486   /* row_max for B */
2487   if (rank != size-1){
2488     for (i=0; i<mbs; i++) {
2489       ncols = bi[1] - bi[0]; bi++;
2490       brow  = bs*i;
2491       for (j=0; j<ncols; j++){
2492         bcol = bs*(*bj);
2493         for (kcol=0; kcol<bs; kcol++){
2494           col = bcol + kcol;                 /* local col index */
2495           col += rowners_bs[rank+1];      /* global col index */
2496           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2497           for (krow=0; krow<bs; krow++){
2498             atmp = PetscAbsScalar(*ba); ba++;
2499             row = brow + krow;    /* local row index */
2500             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2501             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2502             if (work[col] < atmp) work[col] = atmp;
2503           }
2504         }
2505         bj++;
2506       }
2507     }
2508     /*
2509       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2510       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2511       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2512       */
2513 
2514     /* send values to its owners */
2515     for (dest=rank+1; dest<size; dest++){
2516       svalues = work + rowners_bs[dest];
2517       count   = rowners_bs[dest+1]-rowners_bs[dest];
2518       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2519       /*
2520       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2521       PetscSynchronizedFlush(A->comm);
2522       */
2523     }
2524   }
2525 
2526   /* receive values */
2527   if (rank){
2528     rvalues = work;
2529     count   = rowners_bs[rank+1]-rowners_bs[rank];
2530     for (source=0; source<rank; source++){
2531       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2532       /* process values */
2533       for (i=0; i<count; i++){
2534         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2535       }
2536       /*
2537       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2538       PetscSynchronizedFlush(A->comm);
2539       */
2540     }
2541   }
2542 
2543   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2544   ierr = PetscFree(work);CHKERRQ(ierr);
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatRelax_MPISBAIJ"
2550 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2551 {
2552   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2553   PetscErrorCode ierr;
2554   int mbs=mat->mbs,bs=mat->bs;
2555   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2556   Vec            bb1;
2557 
2558   PetscFunctionBegin;
2559   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2560   if (bs > 1)
2561     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2562 
2563   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2564     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2565       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2566       its--;
2567     }
2568 
2569     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2570     while (its--){
2571 
2572       /* lower triangular part: slvec0b = - B^T*xx */
2573       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2574 
2575       /* copy xx into slvec0a */
2576       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2577       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2578       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2579       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2580 
2581       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2582 
2583       /* copy bb into slvec1a */
2584       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2585       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2586       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2587       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2588 
2589       /* set slvec1b = 0 */
2590       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2591 
2592       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2593       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2594       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2595       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2596 
2597       /* upper triangular part: bb1 = bb1 - B*x */
2598       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2599 
2600       /* local diagonal sweep */
2601       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2602     }
2603     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2604   } else {
2605     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2606   }
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 #undef __FUNCT__
2611 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2612 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2613 {
2614   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2615   PetscErrorCode ierr;
2616   PetscScalar    mone=-1.0;
2617   Vec            lvec1,bb1;
2618 
2619   PetscFunctionBegin;
2620   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2621   if (mat->bs > 1)
2622     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2623 
2624   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2625     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2626       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2627       its--;
2628     }
2629 
2630     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2631     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2632     while (its--){
2633       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2634 
2635       /* lower diagonal part: bb1 = bb - B^T*xx */
2636       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2637       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2638 
2639       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2640       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2641       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2642 
2643       /* upper diagonal part: bb1 = bb1 - B*x */
2644       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2646 
2647       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2648 
2649       /* diagonal sweep */
2650       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2651     }
2652     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2653     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2654   } else {
2655     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2656   }
2657   PetscFunctionReturn(0);
2658 }
2659 
2660