xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 5ca2875643c1dba871b18f4695cda45ca2bfcf55)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
83 static int CreateColmap_MPISBAIJ_Private(Mat mat)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"Function not yet written for SBAIJ format");
87   /* PetscFunctionReturn(0); */
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
123  \
124         /* malloc new storage space */ \
125         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
127         new_j = (int*)(new_a + bs2*new_nz); \
128         new_i = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
199  \
200         /* malloc new storage space */ \
201         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
203         new_j = (int*)(new_a + bs2*new_nz); \
204         new_i = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNCT__
248 #define __FUNCT__ "MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int          ierr,i,N = m*n;
253   MatScalar    *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,"Function not yet written for SBAIJ format");
302   /* PetscFunctionReturn(0); */
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,"Function not yet written for SBAIJ format");
315   /* PetscFunctionReturn(0); */
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar    value;
328   PetscTruth   roworiented = baij->roworiented;
329   int          ierr,i,j,row,col;
330   int          rstart_orig=baij->rstart_bs;
331   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
332   int          cend_orig=baij->cend_bs,bs=baij->bs;
333 
334   /* Some Variables required in the macro */
335   Mat          A = baij->A;
336   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
337   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
338   MatScalar    *aa=a->a;
339 
340   Mat          B = baij->B;
341   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
342   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
343   MatScalar    *ba=b->a;
344 
345   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
346   int          low,high,t,ridx,cidx,bs2=a->bs2;
347   MatScalar    *ap,*bap;
348 
349   /* for stash */
350   int          n_loc, *in_loc=0;
351   MatScalar    *v_loc=0;
352 
353   PetscFunctionBegin;
354 
355   if(!baij->donotstash){
356     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
357     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
358   }
359 
360   for (i=0; i<m; i++) {
361     if (im[i] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
364 #endif
365     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
366       row = im[i] - rstart_orig;              /* local row index */
367       for (j=0; j<n; j++) {
368         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
369         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
370           col = in[j] - cstart_orig;          /* local col index */
371           brow = row/bs; bcol = col/bs;
372           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         } else if (in[j] < 0) continue;
377 #if defined(PETSC_USE_BOPT_g)
378         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
379 #endif
380         else {  /* off-diag entry (B) */
381           if (mat->was_assembled) {
382             if (!baij->colmap) {
383               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
384             }
385 #if defined (PETSC_USE_CTABLE)
386             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
387             col  = col - 1;
388 #else
389             col = baij->colmap[in[j]/bs] - 1;
390 #endif
391             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
395               B = baij->B;
396               b = (Mat_SeqBAIJ*)(B)->data;
397               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
398               ba=b->a;
399             } else col += in[j]%bs;
400           } else col = in[j];
401           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
402           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
403           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
404         }
405       }
406     } else {  /* off processor entry */
407       if (!baij->donotstash) {
408         n_loc = 0;
409         for (j=0; j<n; j++){
410           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
411           in_loc[n_loc] = in[j];
412           if (roworiented) {
413             v_loc[n_loc] = v[i*n+j];
414           } else {
415             v_loc[n_loc] = v[j*m+i];
416           }
417           n_loc++;
418         }
419         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
420       }
421     }
422   }
423 
424   if(!baij->donotstash){
425     ierr = PetscFree(in_loc);CHKERRQ(ierr);
426     ierr = PetscFree(v_loc);CHKERRQ(ierr);
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
434 {
435   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
436   const MatScalar *value;
437   MatScalar       *barray=baij->barray;
438   PetscTruth      roworiented = baij->roworiented;
439   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
440   int             rend=baij->rend,cstart=baij->cstart,stepval;
441   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
442 
443   PetscFunctionBegin;
444   if(!barray) {
445     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
446     baij->barray = barray;
447   }
448 
449   if (roworiented) {
450     stepval = (n-1)*bs;
451   } else {
452     stepval = (m-1)*bs;
453   }
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456 #if defined(PETSC_USE_BOPT_g)
457     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
458 #endif
459     if (im[i] >= rstart && im[i] < rend) {
460       row = im[i] - rstart;
461       for (j=0; j<n; j++) {
462         /* If NumCol = 1 then a copy is not required */
463         if ((roworiented) && (n == 1)) {
464           barray = (MatScalar*) v + i*bs2;
465         } else if((!roworiented) && (m == 1)) {
466           barray = (MatScalar*) v + j*bs2;
467         } else { /* Here a copy is required */
468           if (roworiented) {
469             value = v + i*(stepval+bs)*bs + j*bs;
470           } else {
471             value = v + j*(stepval+bs)*bs + i*bs;
472           }
473           for (ii=0; ii<bs; ii++,value+=stepval) {
474             for (jj=0; jj<bs; jj++) {
475               *barray++  = *value++;
476             }
477           }
478           barray -=bs2;
479         }
480 
481         if (in[j] >= cstart && in[j] < cend){
482           col  = in[j] - cstart;
483           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
484         }
485         else if (in[j] < 0) continue;
486 #if defined(PETSC_USE_BOPT_g)
487         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
488 #endif
489         else {
490           if (mat->was_assembled) {
491             if (!baij->colmap) {
492               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
493             }
494 
495 #if defined(PETSC_USE_BOPT_g)
496 #if defined (PETSC_USE_CTABLE)
497             { int data;
498               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
499               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
500             }
501 #else
502             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
503 #endif
504 #endif
505 #if defined (PETSC_USE_CTABLE)
506 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
507             col  = (col - 1)/bs;
508 #else
509             col = (baij->colmap[in[j]] - 1)/bs;
510 #endif
511             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
512               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
513               col =  in[j];
514             }
515           }
516           else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (!baij->donotstash) {
522         if (roworiented) {
523           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
524         } else {
525           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
526         }
527       }
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 #define HASH_KEY 0.6180339887
534 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
535 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
536 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
539 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(1,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
548 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
549 {
550   PetscFunctionBegin;
551   SETERRQ(1,"Function not yet written for SBAIJ format");
552   /* PetscFunctionReturn(0); */
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPISBAIJ"
557 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
558 {
559   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
560   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
561   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
562 
563   PetscFunctionBegin;
564   for (i=0; i<m; i++) {
565     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
566     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
567     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
568       row = idxm[i] - bsrstart;
569       for (j=0; j<n; j++) {
570         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
571         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
572         if (idxn[j] >= bscstart && idxn[j] < bscend){
573           col = idxn[j] - bscstart;
574           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575         } else {
576           if (!baij->colmap) {
577             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
578           }
579 #if defined (PETSC_USE_CTABLE)
580           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
581           data --;
582 #else
583           data = baij->colmap[idxn[j]/bs]-1;
584 #endif
585           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
586           else {
587             col  = data + idxn[j]%bs;
588             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596  PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatNorm_MPISBAIJ"
601 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
602 {
603   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
604   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
605   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
606   int        ierr;
607   PetscReal  sum[2],*lnorm2;
608 
609   PetscFunctionBegin;
610   if (baij->size == 1) {
611     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
612   } else {
613     if (type == NORM_FROBENIUS) {
614       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
615       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
616       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
617       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
618       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
619       /*
620       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
621       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
622       */
623       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
624       /*
625       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
626       PetscSynchronizedFlush(mat->comm); */
627 
628       *norm = sqrt(sum[0] + 2*sum[1]);
629       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
630     } else {
631       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
632     }
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 /*
638   Creates the hash table, and sets the table
639   This table is created only once.
640   If new entried need to be added to the matrix
641   then the hash table has to be destroyed and
642   recreated.
643 */
644 #undef __FUNCT__
645 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
646 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
647 {
648   PetscFunctionBegin;
649   SETERRQ(1,"Function not yet written for SBAIJ format");
650   /* PetscFunctionReturn(0); */
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
655 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
658   int         ierr,nstash,reallocs;
659   InsertMode  addv;
660 
661   PetscFunctionBegin;
662   if (baij->donotstash) {
663     PetscFunctionReturn(0);
664   }
665 
666   /* make sure all processors are either in INSERTMODE or ADDMODE */
667   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
668   if (addv == (ADD_VALUES|INSERT_VALUES)) {
669     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
670   }
671   mat->insertmode = addv; /* in case this processor had no cache */
672 
673   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
674   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
675   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
676   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
677   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
678   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
684 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
685 {
686   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
687   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
688   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
689   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
690   int         *row,*col,other_disassembled;
691   PetscTruth  r1,r2,r3;
692   MatScalar   *val;
693   InsertMode  addv = mat->insertmode;
694 
695   PetscFunctionBegin;
696 
697   if (!baij->donotstash) {
698     while (1) {
699       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
700       /*
701       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
702       PetscSynchronizedFlush(mat->comm);
703       */
704       if (!flg) break;
705 
706       for (i=0; i<n;) {
707         /* Now identify the consecutive vals belonging to the same row */
708         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
713         i = j;
714       }
715     }
716     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
717     /* Now process the block-stash. Since the values are stashed column-oriented,
718        set the roworiented flag to column oriented, and after MatSetValues()
719        restore the original flags */
720     r1 = baij->roworiented;
721     r2 = a->roworiented;
722     r3 = b->roworiented;
723     baij->roworiented = PETSC_FALSE;
724     a->roworiented    = PETSC_FALSE;
725     b->roworiented    = PETSC_FALSE;
726     while (1) {
727       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
728       if (!flg) break;
729 
730       for (i=0; i<n;) {
731         /* Now identify the consecutive vals belonging to the same row */
732         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
733         if (j < n) ncols = j-i;
734         else       ncols = n-i;
735         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
736         i = j;
737       }
738     }
739     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
740     baij->roworiented = r1;
741     a->roworiented    = r2;
742     b->roworiented    = r3;
743   }
744 
745   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
746   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
747 
748   /* determine if any processor has disassembled, if so we must
749      also disassemble ourselfs, in order that we may reassemble. */
750   /*
751      if nonzero structure of submatrix B cannot change then we know that
752      no processor disassembled thus we can skip this stuff
753   */
754   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
755     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
756     if (mat->was_assembled && !other_disassembled) {
757       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
762     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
763   }
764   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
765   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
766 
767 #if defined(PETSC_USE_BOPT_g)
768   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
769     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
770     baij->ht_total_ct  = 0;
771     baij->ht_insert_ct = 0;
772   }
773 #endif
774   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
775     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
776     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
777     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
778   }
779 
780   if (baij->rowvalues) {
781     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
782     baij->rowvalues = 0;
783   }
784 
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
790 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
791 {
792   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
793   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
794   PetscTruth        isascii,isdraw;
795   PetscViewer       sviewer;
796   PetscViewerFormat format;
797 
798   PetscFunctionBegin;
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
801   if (isascii) {
802     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
803     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
804       MatInfo info;
805       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
806       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
807       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
808               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
809               baij->bs,(int)info.memory);CHKERRQ(ierr);
810       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
812       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
814       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
815       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
816       PetscFunctionReturn(0);
817     } else if (format == PETSC_VIEWER_ASCII_INFO) {
818       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
819       PetscFunctionReturn(0);
820     }
821   }
822 
823   if (isdraw) {
824     PetscDraw       draw;
825     PetscTruth isnull;
826     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
827     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
828   }
829 
830   if (size == 1) {
831     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
832     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
833   } else {
834     /* assemble the entire matrix onto first processor. */
835     Mat         A;
836     Mat_SeqSBAIJ *Aloc;
837     Mat_SeqBAIJ *Bloc;
838     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
839     MatScalar   *a;
840 
841     if (!rank) {
842       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
843     } else {
844       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
845     }
846     PetscLogObjectParent(mat,A);
847 
848     /* copy over the A part */
849     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
850     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
851     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
852 
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = (baij->cstart+aj[j])*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     /* copy over the B part */
865     Bloc = (Mat_SeqBAIJ*)baij->B->data;
866     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
867     for (i=0; i<mbs; i++) {
868       rvals[0] = bs*(baij->rstart + i);
869       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
870       for (j=ai[i]; j<ai[i+1]; j++) {
871         col = baij->garray[aj[j]]*bs;
872         for (k=0; k<bs; k++) {
873           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
874           col++; a += bs;
875         }
876       }
877     }
878     ierr = PetscFree(rvals);CHKERRQ(ierr);
879     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
880     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
881     /*
882        Everyone has to call to draw the matrix since the graphics waits are
883        synchronized across all processors that share the PetscDraw object
884     */
885     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
886     if (!rank) {
887       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
888       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
889     }
890     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
891     ierr = MatDestroy(A);CHKERRQ(ierr);
892   }
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatView_MPISBAIJ"
898 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
899 {
900   int        ierr;
901   PetscTruth isascii,isdraw,issocket,isbinary;
902 
903   PetscFunctionBegin;
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
908   if (isascii || isdraw || issocket || isbinary) {
909     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
910   } else {
911     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatDestroy_MPISBAIJ"
918 int MatDestroy_MPISBAIJ(Mat mat)
919 {
920   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
921   int         ierr;
922 
923   PetscFunctionBegin;
924 #if defined(PETSC_USE_LOG)
925   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
926 #endif
927   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
928   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
929   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
930   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
931   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
932 #if defined (PETSC_USE_CTABLE)
933   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
934 #else
935   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
936 #endif
937   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
938   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
939   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
940   if (baij->slvec0) {
941     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
942     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
943   }
944   if (baij->slvec1) {
945     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
946     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
947     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
948   }
949   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
950   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
951   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
952   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
953 #if defined(PETSC_USE_MAT_SINGLE)
954   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
955 #endif
956   ierr = PetscFree(baij);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatMult_MPISBAIJ"
962 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
963 {
964   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
965   int         ierr,nt,mbs=a->mbs,bs=a->bs;
966   PetscScalar *x,*from,zero=0.0;
967 
968   PetscFunctionBegin;
969   /*
970   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
971   PetscSynchronizedFlush(A->comm);
972   */
973   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
974   if (nt != A->n) {
975     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
976   }
977   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
978   if (nt != A->m) {
979     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
980   }
981 
982   /* diagonal part */
983   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
984   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
985 
986   /* subdiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
988 
989   /* copy x into the vec slvec0 */
990   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
991   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
993   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
994 
995   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
996   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
998 
999   /* supperdiagonal part */
1000   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1001 
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1007 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1008 {
1009   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1010   int         ierr,nt;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1014   if (nt != A->n) {
1015     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1016   }
1017   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1018   if (nt != A->m) {
1019     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1020   }
1021 
1022   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1023   /* do diagonal part */
1024   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1025   /* do supperdiagonal part */
1026   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1027   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1028   /* do subdiagonal part */
1029   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1030   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1031   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1032 
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1038 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1039 {
1040   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1041   int          ierr,mbs=a->mbs,bs=a->bs;
1042   PetscScalar  *x,*from,zero=0.0;
1043 
1044   PetscFunctionBegin;
1045   /*
1046   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1047   PetscSynchronizedFlush(A->comm);
1048   */
1049   /* diagonal part */
1050   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1051   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1052 
1053   /* subdiagonal part */
1054   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1055 
1056   /* copy x into the vec slvec0 */
1057   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1058   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1059   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1060   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1061 
1062   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1063   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1064   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1065 
1066   /* supperdiagonal part */
1067   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1068 
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1074 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1075 {
1076   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1077   int        ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1081   /* do diagonal part */
1082   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* do supperdiagonal part */
1084   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1085   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1086 
1087   /* do subdiagonal part */
1088   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1089   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1097 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1098 {
1099   PetscFunctionBegin;
1100   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1101   /* PetscFunctionReturn(0); */
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1106 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1107 {
1108   PetscFunctionBegin;
1109   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1110   /* PetscFunctionReturn(0); */
1111 }
1112 
1113 /*
1114   This only works correctly for square matrices where the subblock A->A is the
1115    diagonal block
1116 */
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1119 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1120 {
1121   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1122   int         ierr;
1123 
1124   PetscFunctionBegin;
1125   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPISBAIJ"
1132 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1133 {
1134   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1135   int         ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1139   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1145 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1146 {
1147   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1148   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1149   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1150   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1151   int            *cmap,*idx_p,cstart = mat->cstart;
1152 
1153   PetscFunctionBegin;
1154   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1155   mat->getrowactive = PETSC_TRUE;
1156 
1157   if (!mat->rowvalues && (idx || v)) {
1158     /*
1159         allocate enough space to hold information from the longest row.
1160     */
1161     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1162     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1163     int     max = 1,mbs = mat->mbs,tmp;
1164     for (i=0; i<mbs; i++) {
1165       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1166       if (max < tmp) { max = tmp; }
1167     }
1168     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1169     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1170   }
1171 
1172   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1173   lrow = row - brstart;  /* local row index */
1174 
1175   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1176   if (!v)   {pvA = 0; pvB = 0;}
1177   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1178   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1179   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1180   nztot = nzA + nzB;
1181 
1182   cmap  = mat->garray;
1183   if (v  || idx) {
1184     if (nztot) {
1185       /* Sort by increasing column numbers, assuming A and B already sorted */
1186       int imark = -1;
1187       if (v) {
1188         *v = v_p = mat->rowvalues;
1189         for (i=0; i<nzB; i++) {
1190           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1191           else break;
1192         }
1193         imark = i;
1194         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1195         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1196       }
1197       if (idx) {
1198         *idx = idx_p = mat->rowindices;
1199         if (imark > -1) {
1200           for (i=0; i<imark; i++) {
1201             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1202           }
1203         } else {
1204           for (i=0; i<nzB; i++) {
1205             if (cmap[cworkB[i]/bs] < cstart)
1206               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1207             else break;
1208           }
1209           imark = i;
1210         }
1211         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1212         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1213       }
1214     } else {
1215       if (idx) *idx = 0;
1216       if (v)   *v   = 0;
1217     }
1218   }
1219   *nz = nztot;
1220   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1221   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1227 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1228 {
1229   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1230 
1231   PetscFunctionBegin;
1232   if (baij->getrowactive == PETSC_FALSE) {
1233     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1234   }
1235   baij->getrowactive = PETSC_FALSE;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1241 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1242 {
1243   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1244 
1245   PetscFunctionBegin;
1246   *bs = baij->bs;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1252 int MatZeroEntries_MPISBAIJ(Mat A)
1253 {
1254   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1255   int         ierr;
1256 
1257   PetscFunctionBegin;
1258   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1259   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1265 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1266 {
1267   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1268   Mat         A = a->A,B = a->B;
1269   int         ierr;
1270   PetscReal   isend[5],irecv[5];
1271 
1272   PetscFunctionBegin;
1273   info->block_size     = (PetscReal)a->bs;
1274   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1275   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1276   isend[3] = info->memory;  isend[4] = info->mallocs;
1277   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1278   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1279   isend[3] += info->memory;  isend[4] += info->mallocs;
1280   if (flag == MAT_LOCAL) {
1281     info->nz_used      = isend[0];
1282     info->nz_allocated = isend[1];
1283     info->nz_unneeded  = isend[2];
1284     info->memory       = isend[3];
1285     info->mallocs      = isend[4];
1286   } else if (flag == MAT_GLOBAL_MAX) {
1287     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1288     info->nz_used      = irecv[0];
1289     info->nz_allocated = irecv[1];
1290     info->nz_unneeded  = irecv[2];
1291     info->memory       = irecv[3];
1292     info->mallocs      = irecv[4];
1293   } else if (flag == MAT_GLOBAL_SUM) {
1294     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1295     info->nz_used      = irecv[0];
1296     info->nz_allocated = irecv[1];
1297     info->nz_unneeded  = irecv[2];
1298     info->memory       = irecv[3];
1299     info->mallocs      = irecv[4];
1300   } else {
1301     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1302   }
1303   info->rows_global       = (PetscReal)A->M;
1304   info->columns_global    = (PetscReal)A->N;
1305   info->rows_local        = (PetscReal)A->m;
1306   info->columns_local     = (PetscReal)A->N;
1307   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1308   info->fill_ratio_needed = 0;
1309   info->factor_mallocs    = 0;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1315 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1316 {
1317   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1318   int         ierr;
1319 
1320   PetscFunctionBegin;
1321   switch (op) {
1322   case MAT_NO_NEW_NONZERO_LOCATIONS:
1323   case MAT_YES_NEW_NONZERO_LOCATIONS:
1324   case MAT_COLUMNS_UNSORTED:
1325   case MAT_COLUMNS_SORTED:
1326   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1327   case MAT_KEEP_ZEROED_ROWS:
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1330     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1331     break;
1332   case MAT_ROW_ORIENTED:
1333     a->roworiented = PETSC_TRUE;
1334     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1335     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1336     break;
1337   case MAT_ROWS_SORTED:
1338   case MAT_ROWS_UNSORTED:
1339   case MAT_YES_NEW_DIAGONALS:
1340     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1341     break;
1342   case MAT_COLUMN_ORIENTED:
1343     a->roworiented = PETSC_FALSE;
1344     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1345     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1346     break;
1347   case MAT_IGNORE_OFF_PROC_ENTRIES:
1348     a->donotstash = PETSC_TRUE;
1349     break;
1350   case MAT_NO_NEW_DIAGONALS:
1351     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1352   case MAT_USE_HASH_TABLE:
1353     a->ht_flag = PETSC_TRUE;
1354     break;
1355   case MAT_NOT_SYMMETRIC:
1356   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1357   case MAT_HERMITIAN:
1358     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1359   case MAT_SYMMETRIC:
1360   case MAT_STRUCTURALLY_SYMMETRIC:
1361   case MAT_NOT_HERMITIAN:
1362   case MAT_SYMMETRY_ETERNAL:
1363   case MAT_NOT_SYMMETRY_ETERNAL:
1364     break;
1365   default:
1366     SETERRQ(PETSC_ERR_SUP,"unknown option");
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1373 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1374 {
1375   int ierr;
1376   PetscFunctionBegin;
1377   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1383 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1384 {
1385   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1386   Mat         a = baij->A,b = baij->B;
1387   int         ierr,s1,s2,s3;
1388 
1389   PetscFunctionBegin;
1390   if (ll != rr) {
1391     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1392   }
1393   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1394   if (rr) {
1395     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1396     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1397     /* Overlap communication with computation. */
1398     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1399     /*} if (ll) { */
1400     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1401     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1402     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1403     /* } */
1404   /* scale  the diagonal block */
1405   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1406 
1407   /* if (rr) { */
1408     /* Do a scatter end and then right scale the off-diagonal block */
1409     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1410     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1411   }
1412 
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1418 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1419 {
1420   PetscFunctionBegin;
1421   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1426 int MatPrintHelp_MPISBAIJ(Mat A)
1427 {
1428   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1429   MPI_Comm    comm = A->comm;
1430   static int  called = 0;
1431   int         ierr;
1432 
1433   PetscFunctionBegin;
1434   if (!a->rank) {
1435     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1436   }
1437   if (called) {PetscFunctionReturn(0);} else called = 1;
1438   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1439   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1445 int MatSetUnfactored_MPISBAIJ(Mat A)
1446 {
1447   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1448   int         ierr;
1449 
1450   PetscFunctionBegin;
1451   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatEqual_MPISBAIJ"
1459 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1460 {
1461   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1462   Mat         a,b,c,d;
1463   PetscTruth  flg;
1464   int         ierr;
1465 
1466   PetscFunctionBegin;
1467   a = matA->A; b = matA->B;
1468   c = matB->A; d = matB->B;
1469 
1470   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1471   if (flg == PETSC_TRUE) {
1472     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1473   }
1474   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1480 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1481 {
1482   int        ierr;
1483 
1484   PetscFunctionBegin;
1485   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 /* -------------------------------------------------------------------*/
1489 static struct _MatOps MatOps_Values = {
1490        MatSetValues_MPISBAIJ,
1491        MatGetRow_MPISBAIJ,
1492        MatRestoreRow_MPISBAIJ,
1493        MatMult_MPISBAIJ,
1494 /* 4*/ MatMultAdd_MPISBAIJ,
1495        MatMultTranspose_MPISBAIJ,
1496        MatMultTransposeAdd_MPISBAIJ,
1497        0,
1498        0,
1499        0,
1500 /*10*/ 0,
1501        0,
1502        0,
1503        MatRelax_MPISBAIJ,
1504        MatTranspose_MPISBAIJ,
1505 /*15*/ MatGetInfo_MPISBAIJ,
1506        MatEqual_MPISBAIJ,
1507        MatGetDiagonal_MPISBAIJ,
1508        MatDiagonalScale_MPISBAIJ,
1509        MatNorm_MPISBAIJ,
1510 /*20*/ MatAssemblyBegin_MPISBAIJ,
1511        MatAssemblyEnd_MPISBAIJ,
1512        0,
1513        MatSetOption_MPISBAIJ,
1514        MatZeroEntries_MPISBAIJ,
1515 /*25*/ MatZeroRows_MPISBAIJ,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*35*/ MatDuplicate_MPISBAIJ,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*40*/ 0,
1531        0,
1532        0,
1533        MatGetValues_MPISBAIJ,
1534        0,
1535 /*45*/ MatPrintHelp_MPISBAIJ,
1536        MatScale_MPISBAIJ,
1537        0,
1538        0,
1539        0,
1540 /*50*/ MatGetBlockSize_MPISBAIJ,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*55*/ 0,
1546        0,
1547        MatSetUnfactored_MPISBAIJ,
1548        0,
1549        MatSetValuesBlocked_MPISBAIJ,
1550 /*60*/ 0,
1551        0,
1552        0,
1553        MatGetPetscMaps_Petsc,
1554        0,
1555 /*65*/ 0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*70*/ MatGetRowMax_MPISBAIJ,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*75*/ 0,
1566        0,
1567        0,
1568        0,
1569        0,
1570 /*80*/ 0,
1571        0,
1572        0,
1573        0,
1574 /*85*/ MatLoad_MPISBAIJ
1575 };
1576 
1577 
1578 EXTERN_C_BEGIN
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1581 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1582 {
1583   PetscFunctionBegin;
1584   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1585   *iscopy = PETSC_FALSE;
1586   PetscFunctionReturn(0);
1587 }
1588 EXTERN_C_END
1589 
1590 EXTERN_C_BEGIN
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1593 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1594 {
1595   Mat_MPISBAIJ *b;
1596   int          ierr,i,mbs,Mbs;
1597 
1598   PetscFunctionBegin;
1599   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1600 
1601   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1602   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1603   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1604   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1605   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1606   if (d_nnz) {
1607     for (i=0; i<B->m/bs; i++) {
1608       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1609     }
1610   }
1611   if (o_nnz) {
1612     for (i=0; i<B->m/bs; i++) {
1613       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1614     }
1615   }
1616   B->preallocated = PETSC_TRUE;
1617   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1618   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1619   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1620   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1621 
1622   b   = (Mat_MPISBAIJ*)B->data;
1623   mbs = B->m/bs;
1624   Mbs = B->M/bs;
1625   if (mbs*bs != B->m) {
1626     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1627   }
1628 
1629   b->bs  = bs;
1630   b->bs2 = bs*bs;
1631   b->mbs = mbs;
1632   b->nbs = mbs;
1633   b->Mbs = Mbs;
1634   b->Nbs = Mbs;
1635 
1636   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1637   b->rowners[0]    = 0;
1638   for (i=2; i<=b->size; i++) {
1639     b->rowners[i] += b->rowners[i-1];
1640   }
1641   b->rstart    = b->rowners[b->rank];
1642   b->rend      = b->rowners[b->rank+1];
1643   b->cstart    = b->rstart;
1644   b->cend      = b->rend;
1645   for (i=0; i<=b->size; i++) {
1646     b->rowners_bs[i] = b->rowners[i]*bs;
1647   }
1648   b->rstart_bs = b-> rstart*bs;
1649   b->rend_bs   = b->rend*bs;
1650 
1651   b->cstart_bs = b->cstart*bs;
1652   b->cend_bs   = b->cend*bs;
1653 
1654 
1655   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1656   PetscLogObjectParent(B,b->A);
1657   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1658   PetscLogObjectParent(B,b->B);
1659 
1660   /* build cache for off array entries formed */
1661   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1662 
1663   PetscFunctionReturn(0);
1664 }
1665 EXTERN_C_END
1666 
1667 /*MC
1668    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1669    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1670 
1671    Options Database Keys:
1672 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1673 
1674   Level: beginner
1675 
1676 .seealso: MatCreateMPISBAIJ
1677 M*/
1678 
1679 EXTERN_C_BEGIN
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatCreate_MPISBAIJ"
1682 int MatCreate_MPISBAIJ(Mat B)
1683 {
1684   Mat_MPISBAIJ *b;
1685   int          ierr;
1686   PetscTruth   flg;
1687 
1688   PetscFunctionBegin;
1689 
1690   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1691   B->data = (void*)b;
1692   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1693   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1694 
1695   B->ops->destroy    = MatDestroy_MPISBAIJ;
1696   B->ops->view       = MatView_MPISBAIJ;
1697   B->mapping    = 0;
1698   B->factor     = 0;
1699   B->assembled  = PETSC_FALSE;
1700 
1701   B->insertmode = NOT_SET_VALUES;
1702   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1703   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1704 
1705   /* build local table of row and column ownerships */
1706   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1707   b->cowners    = b->rowners + b->size + 2;
1708   b->rowners_bs = b->cowners + b->size + 2;
1709   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1710 
1711   /* build cache for off array entries formed */
1712   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1713   b->donotstash  = PETSC_FALSE;
1714   b->colmap      = PETSC_NULL;
1715   b->garray      = PETSC_NULL;
1716   b->roworiented = PETSC_TRUE;
1717 
1718 #if defined(PETSC_USE_MAT_SINGLE)
1719   /* stuff for MatSetValues_XXX in single precision */
1720   b->setvalueslen     = 0;
1721   b->setvaluescopy    = PETSC_NULL;
1722 #endif
1723 
1724   /* stuff used in block assembly */
1725   b->barray       = 0;
1726 
1727   /* stuff used for matrix vector multiply */
1728   b->lvec         = 0;
1729   b->Mvctx        = 0;
1730   b->slvec0       = 0;
1731   b->slvec0b      = 0;
1732   b->slvec1       = 0;
1733   b->slvec1a      = 0;
1734   b->slvec1b      = 0;
1735   b->sMvctx       = 0;
1736 
1737   /* stuff for MatGetRow() */
1738   b->rowindices   = 0;
1739   b->rowvalues    = 0;
1740   b->getrowactive = PETSC_FALSE;
1741 
1742   /* hash table stuff */
1743   b->ht           = 0;
1744   b->hd           = 0;
1745   b->ht_size      = 0;
1746   b->ht_flag      = PETSC_FALSE;
1747   b->ht_fact      = 0;
1748   b->ht_total_ct  = 0;
1749   b->ht_insert_ct = 0;
1750 
1751   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1752   if (flg) {
1753     PetscReal fact = 1.39;
1754     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1755     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1756     if (fact <= 1.0) fact = 1.39;
1757     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1758     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1759   }
1760   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1761                                      "MatStoreValues_MPISBAIJ",
1762                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1763   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1764                                      "MatRetrieveValues_MPISBAIJ",
1765                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1766   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1767                                      "MatGetDiagonalBlock_MPISBAIJ",
1768                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1770                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1771                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1772   PetscFunctionReturn(0);
1773 }
1774 EXTERN_C_END
1775 
1776 /*MC
1777    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1778 
1779    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1780    and MATMPISBAIJ otherwise.
1781 
1782    Options Database Keys:
1783 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1784 
1785   Level: beginner
1786 
1787 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1788 M*/
1789 
1790 EXTERN_C_BEGIN
1791 #undef __FUNCT__
1792 #define __FUNCT__ "MatCreate_SBAIJ"
1793 int MatCreate_SBAIJ(Mat A) {
1794   int ierr,size;
1795 
1796   PetscFunctionBegin;
1797   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1798   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1799   if (size == 1) {
1800     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1801   } else {
1802     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 EXTERN_C_END
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1810 /*@C
1811    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1812    the user should preallocate the matrix storage by setting the parameters
1813    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1814    performance can be increased by more than a factor of 50.
1815 
1816    Collective on Mat
1817 
1818    Input Parameters:
1819 +  A - the matrix
1820 .  bs   - size of blockk
1821 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1822            submatrix  (same for all local rows)
1823 .  d_nnz - array containing the number of block nonzeros in the various block rows
1824            in the upper triangular and diagonal part of the in diagonal portion of the local
1825            (possibly different for each block row) or PETSC_NULL.  You must leave room
1826            for the diagonal entry even if it is zero.
1827 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1828            submatrix (same for all local rows).
1829 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1830            off-diagonal portion of the local submatrix (possibly different for
1831            each block row) or PETSC_NULL.
1832 
1833 
1834    Options Database Keys:
1835 .   -mat_no_unroll - uses code that does not unroll the loops in the
1836                      block calculations (much slower)
1837 .   -mat_block_size - size of the blocks to use
1838 
1839    Notes:
1840 
1841    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1842    than it must be used on all processors that share the object for that argument.
1843 
1844    Storage Information:
1845    For a square global matrix we define each processor's diagonal portion
1846    to be its local rows and the corresponding columns (a square submatrix);
1847    each processor's off-diagonal portion encompasses the remainder of the
1848    local matrix (a rectangular submatrix).
1849 
1850    The user can specify preallocated storage for the diagonal part of
1851    the local submatrix with either d_nz or d_nnz (not both).  Set
1852    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1853    memory allocation.  Likewise, specify preallocated storage for the
1854    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1855 
1856    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1857    the figure below we depict these three local rows and all columns (0-11).
1858 
1859 .vb
1860            0 1 2 3 4 5 6 7 8 9 10 11
1861           -------------------
1862    row 3  |  o o o d d d o o o o o o
1863    row 4  |  o o o d d d o o o o o o
1864    row 5  |  o o o d d d o o o o o o
1865           -------------------
1866 .ve
1867 
1868    Thus, any entries in the d locations are stored in the d (diagonal)
1869    submatrix, and any entries in the o locations are stored in the
1870    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1871    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1872 
1873    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1874    plus the diagonal part of the d matrix,
1875    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1876    In general, for PDE problems in which most nonzeros are near the diagonal,
1877    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1878    or you will get TERRIBLE performance; see the users' manual chapter on
1879    matrices.
1880 
1881    Level: intermediate
1882 
1883 .keywords: matrix, block, aij, compressed row, sparse, parallel
1884 
1885 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1886 @*/
1887 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1888 {
1889   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1890 
1891   PetscFunctionBegin;
1892   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1893   if (f) {
1894     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatCreateMPISBAIJ"
1901 /*@C
1902    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1903    (block compressed row).  For good matrix assembly performance
1904    the user should preallocate the matrix storage by setting the parameters
1905    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1906    performance can be increased by more than a factor of 50.
1907 
1908    Collective on MPI_Comm
1909 
1910    Input Parameters:
1911 +  comm - MPI communicator
1912 .  bs   - size of blockk
1913 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1914            This value should be the same as the local size used in creating the
1915            y vector for the matrix-vector product y = Ax.
1916 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1917            This value should be the same as the local size used in creating the
1918            x vector for the matrix-vector product y = Ax.
1919 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1920 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1921 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1922            submatrix  (same for all local rows)
1923 .  d_nnz - array containing the number of block nonzeros in the various block rows
1924            in the upper triangular portion of the in diagonal portion of the local
1925            (possibly different for each block block row) or PETSC_NULL.
1926            You must leave room for the diagonal entry even if it is zero.
1927 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1928            submatrix (same for all local rows).
1929 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1930            off-diagonal portion of the local submatrix (possibly different for
1931            each block row) or PETSC_NULL.
1932 
1933    Output Parameter:
1934 .  A - the matrix
1935 
1936    Options Database Keys:
1937 .   -mat_no_unroll - uses code that does not unroll the loops in the
1938                      block calculations (much slower)
1939 .   -mat_block_size - size of the blocks to use
1940 .   -mat_mpi - use the parallel matrix data structures even on one processor
1941                (defaults to using SeqBAIJ format on one processor)
1942 
1943    Notes:
1944    The user MUST specify either the local or global matrix dimensions
1945    (possibly both).
1946 
1947    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1948    than it must be used on all processors that share the object for that argument.
1949 
1950    Storage Information:
1951    For a square global matrix we define each processor's diagonal portion
1952    to be its local rows and the corresponding columns (a square submatrix);
1953    each processor's off-diagonal portion encompasses the remainder of the
1954    local matrix (a rectangular submatrix).
1955 
1956    The user can specify preallocated storage for the diagonal part of
1957    the local submatrix with either d_nz or d_nnz (not both).  Set
1958    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1959    memory allocation.  Likewise, specify preallocated storage for the
1960    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1961 
1962    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1963    the figure below we depict these three local rows and all columns (0-11).
1964 
1965 .vb
1966            0 1 2 3 4 5 6 7 8 9 10 11
1967           -------------------
1968    row 3  |  o o o d d d o o o o o o
1969    row 4  |  o o o d d d o o o o o o
1970    row 5  |  o o o d d d o o o o o o
1971           -------------------
1972 .ve
1973 
1974    Thus, any entries in the d locations are stored in the d (diagonal)
1975    submatrix, and any entries in the o locations are stored in the
1976    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1977    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1978 
1979    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1980    plus the diagonal part of the d matrix,
1981    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1982    In general, for PDE problems in which most nonzeros are near the diagonal,
1983    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1984    or you will get TERRIBLE performance; see the users' manual chapter on
1985    matrices.
1986 
1987    Level: intermediate
1988 
1989 .keywords: matrix, block, aij, compressed row, sparse, parallel
1990 
1991 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1992 @*/
1993 
1994 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
1995 {
1996   int ierr,size;
1997 
1998   PetscFunctionBegin;
1999   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2000   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2001   if (size > 1) {
2002     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2003     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2004   } else {
2005     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2006     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2007   }
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 
2012 #undef __FUNCT__
2013 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2014 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2015 {
2016   Mat          mat;
2017   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2018   int          ierr,len=0;
2019 
2020   PetscFunctionBegin;
2021   *newmat       = 0;
2022   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2023   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2024   mat->preallocated = PETSC_TRUE;
2025   a = (Mat_MPISBAIJ*)mat->data;
2026   a->bs  = oldmat->bs;
2027   a->bs2 = oldmat->bs2;
2028   a->mbs = oldmat->mbs;
2029   a->nbs = oldmat->nbs;
2030   a->Mbs = oldmat->Mbs;
2031   a->Nbs = oldmat->Nbs;
2032 
2033   a->rstart       = oldmat->rstart;
2034   a->rend         = oldmat->rend;
2035   a->cstart       = oldmat->cstart;
2036   a->cend         = oldmat->cend;
2037   a->size         = oldmat->size;
2038   a->rank         = oldmat->rank;
2039   a->donotstash   = oldmat->donotstash;
2040   a->roworiented  = oldmat->roworiented;
2041   a->rowindices   = 0;
2042   a->rowvalues    = 0;
2043   a->getrowactive = PETSC_FALSE;
2044   a->barray       = 0;
2045   a->rstart_bs    = oldmat->rstart_bs;
2046   a->rend_bs      = oldmat->rend_bs;
2047   a->cstart_bs    = oldmat->cstart_bs;
2048   a->cend_bs      = oldmat->cend_bs;
2049 
2050   /* hash table stuff */
2051   a->ht           = 0;
2052   a->hd           = 0;
2053   a->ht_size      = 0;
2054   a->ht_flag      = oldmat->ht_flag;
2055   a->ht_fact      = oldmat->ht_fact;
2056   a->ht_total_ct  = 0;
2057   a->ht_insert_ct = 0;
2058 
2059   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2060   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2061   a->cowners    = a->rowners + a->size + 2;
2062   a->rowners_bs = a->cowners + a->size + 2;
2063   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2064   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2065   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2066   if (oldmat->colmap) {
2067 #if defined (PETSC_USE_CTABLE)
2068     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2069 #else
2070     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2071     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2072     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2073 #endif
2074   } else a->colmap = 0;
2075   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2076     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2077     PetscLogObjectMemory(mat,len*sizeof(int));
2078     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2079   } else a->garray = 0;
2080 
2081   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2082   PetscLogObjectParent(mat,a->lvec);
2083   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2084 
2085   PetscLogObjectParent(mat,a->Mvctx);
2086   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2087   PetscLogObjectParent(mat,a->A);
2088   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2089   PetscLogObjectParent(mat,a->B);
2090   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2091   *newmat = mat;
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #include "petscsys.h"
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatLoad_MPISBAIJ"
2099 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2100 {
2101   Mat          A;
2102   int          i,nz,ierr,j,rstart,rend,fd;
2103   PetscScalar  *vals,*buf;
2104   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2105   MPI_Status   status;
2106   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2107   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2108   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2109   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2110   int          dcount,kmax,k,nzcount,tmp;
2111 
2112   PetscFunctionBegin;
2113   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2114 
2115   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2116   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2117   if (!rank) {
2118     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2119     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2120     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2121     if (header[3] < 0) {
2122       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2123     }
2124   }
2125 
2126   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2127   M = header[1]; N = header[2];
2128 
2129   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2130 
2131   /*
2132      This code adds extra rows to make sure the number of rows is
2133      divisible by the blocksize
2134   */
2135   Mbs        = M/bs;
2136   extra_rows = bs - M + bs*(Mbs);
2137   if (extra_rows == bs) extra_rows = 0;
2138   else                  Mbs++;
2139   if (extra_rows &&!rank) {
2140     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2141   }
2142 
2143   /* determine ownership of all rows */
2144   mbs        = Mbs/size + ((Mbs % size) > rank);
2145   m          = mbs*bs;
2146   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2147   browners   = rowners + size + 1;
2148   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2149   rowners[0] = 0;
2150   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2151   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2152   rstart = rowners[rank];
2153   rend   = rowners[rank+1];
2154 
2155   /* distribute row lengths to all processors */
2156   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2157   if (!rank) {
2158     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2159     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2160     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2161     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2162     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2163     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2164     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2165   } else {
2166     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2167   }
2168 
2169   if (!rank) {   /* procs[0] */
2170     /* calculate the number of nonzeros on each processor */
2171     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2172     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2173     for (i=0; i<size; i++) {
2174       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2175         procsnz[i] += rowlengths[j];
2176       }
2177     }
2178     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2179 
2180     /* determine max buffer needed and allocate it */
2181     maxnz = 0;
2182     for (i=0; i<size; i++) {
2183       maxnz = PetscMax(maxnz,procsnz[i]);
2184     }
2185     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2186 
2187     /* read in my part of the matrix column indices  */
2188     nz     = procsnz[0];
2189     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2190     mycols = ibuf;
2191     if (size == 1)  nz -= extra_rows;
2192     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2193     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2194 
2195     /* read in every ones (except the last) and ship off */
2196     for (i=1; i<size-1; i++) {
2197       nz   = procsnz[i];
2198       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2199       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2200     }
2201     /* read in the stuff for the last proc */
2202     if (size != 1) {
2203       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2204       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2205       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2206       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2207     }
2208     ierr = PetscFree(cols);CHKERRQ(ierr);
2209   } else {  /* procs[i], i>0 */
2210     /* determine buffer space needed for message */
2211     nz = 0;
2212     for (i=0; i<m; i++) {
2213       nz += locrowlens[i];
2214     }
2215     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2216     mycols = ibuf;
2217     /* receive message of column indices*/
2218     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2219     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2220     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2221   }
2222 
2223   /* loop over local rows, determining number of off diagonal entries */
2224   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2225   odlens   = dlens + (rend-rstart);
2226   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2227   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2228   masked1  = mask    + Mbs;
2229   masked2  = masked1 + Mbs;
2230   rowcount = 0; nzcount = 0;
2231   for (i=0; i<mbs; i++) {
2232     dcount  = 0;
2233     odcount = 0;
2234     for (j=0; j<bs; j++) {
2235       kmax = locrowlens[rowcount];
2236       for (k=0; k<kmax; k++) {
2237         tmp = mycols[nzcount++]/bs; /* block col. index */
2238         if (!mask[tmp]) {
2239           mask[tmp] = 1;
2240           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2241           else masked1[dcount++] = tmp; /* entry in diag portion */
2242         }
2243       }
2244       rowcount++;
2245     }
2246 
2247     dlens[i]  = dcount;  /* d_nzz[i] */
2248     odlens[i] = odcount; /* o_nzz[i] */
2249 
2250     /* zero out the mask elements we set */
2251     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2252     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2253   }
2254 
2255   /* create our matrix */
2256   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2257   ierr = MatSetType(A,type);CHKERRQ(ierr);
2258   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2259   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2260 
2261   if (!rank) {
2262     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2263     /* read in my part of the matrix numerical values  */
2264     nz = procsnz[0];
2265     vals = buf;
2266     mycols = ibuf;
2267     if (size == 1)  nz -= extra_rows;
2268     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2269     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2270 
2271     /* insert into matrix */
2272     jj      = rstart*bs;
2273     for (i=0; i<m; i++) {
2274       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2275       mycols += locrowlens[i];
2276       vals   += locrowlens[i];
2277       jj++;
2278     }
2279 
2280     /* read in other processors (except the last one) and ship out */
2281     for (i=1; i<size-1; i++) {
2282       nz   = procsnz[i];
2283       vals = buf;
2284       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2285       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2286     }
2287     /* the last proc */
2288     if (size != 1){
2289       nz   = procsnz[i] - extra_rows;
2290       vals = buf;
2291       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2292       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2293       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2294     }
2295     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2296 
2297   } else {
2298     /* receive numeric values */
2299     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2300 
2301     /* receive message of values*/
2302     vals   = buf;
2303     mycols = ibuf;
2304     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2305     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2306     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2307 
2308     /* insert into matrix */
2309     jj      = rstart*bs;
2310     for (i=0; i<m; i++) {
2311       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2312       mycols += locrowlens[i];
2313       vals   += locrowlens[i];
2314       jj++;
2315     }
2316   }
2317 
2318   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2319   ierr = PetscFree(buf);CHKERRQ(ierr);
2320   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2321   ierr = PetscFree(rowners);CHKERRQ(ierr);
2322   ierr = PetscFree(dlens);CHKERRQ(ierr);
2323   ierr = PetscFree(mask);CHKERRQ(ierr);
2324   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2325   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2326   *newmat = A;
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 #undef __FUNCT__
2331 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2332 /*@
2333    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2334 
2335    Input Parameters:
2336 .  mat  - the matrix
2337 .  fact - factor
2338 
2339    Collective on Mat
2340 
2341    Level: advanced
2342 
2343   Notes:
2344    This can also be set by the command line option: -mat_use_hash_table fact
2345 
2346 .keywords: matrix, hashtable, factor, HT
2347 
2348 .seealso: MatSetOption()
2349 @*/
2350 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2351 {
2352   PetscFunctionBegin;
2353   SETERRQ(1,"Function not yet written for SBAIJ format");
2354   /* PetscFunctionReturn(0); */
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2359 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2360 {
2361   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2362   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2363   PetscReal    atmp;
2364   PetscReal    *work,*svalues,*rvalues;
2365   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2366   int          rank,size,*rowners_bs,dest,count,source;
2367   PetscScalar  *va;
2368   MatScalar    *ba;
2369   MPI_Status   stat;
2370 
2371   PetscFunctionBegin;
2372   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2373   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2374 
2375   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2376   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2377 
2378   bs   = a->bs;
2379   mbs  = a->mbs;
2380   Mbs  = a->Mbs;
2381   ba   = b->a;
2382   bi   = b->i;
2383   bj   = b->j;
2384   /*
2385   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2386   PetscSynchronizedFlush(A->comm);
2387   */
2388 
2389   /* find ownerships */
2390   rowners_bs = a->rowners_bs;
2391   /*
2392   if (!rank){
2393     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2394   }
2395   */
2396 
2397   /* each proc creates an array to be distributed */
2398   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2399   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2400 
2401   /* row_max for B */
2402   if (rank != size-1){
2403     for (i=0; i<mbs; i++) {
2404       ncols = bi[1] - bi[0]; bi++;
2405       brow  = bs*i;
2406       for (j=0; j<ncols; j++){
2407         bcol = bs*(*bj);
2408         for (kcol=0; kcol<bs; kcol++){
2409           col = bcol + kcol;                 /* local col index */
2410           col += rowners_bs[rank+1];      /* global col index */
2411           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2412           for (krow=0; krow<bs; krow++){
2413             atmp = PetscAbsScalar(*ba); ba++;
2414             row = brow + krow;    /* local row index */
2415             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2416             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2417             if (work[col] < atmp) work[col] = atmp;
2418           }
2419         }
2420         bj++;
2421       }
2422     }
2423     /*
2424       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2425       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2426       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2427       */
2428 
2429     /* send values to its owners */
2430     for (dest=rank+1; dest<size; dest++){
2431       svalues = work + rowners_bs[dest];
2432       count   = rowners_bs[dest+1]-rowners_bs[dest];
2433       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2434       /*
2435       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2436       PetscSynchronizedFlush(A->comm);
2437       */
2438     }
2439   }
2440 
2441   /* receive values */
2442   if (rank){
2443     rvalues = work;
2444     count   = rowners_bs[rank+1]-rowners_bs[rank];
2445     for (source=0; source<rank; source++){
2446       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2447       /* process values */
2448       for (i=0; i<count; i++){
2449         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2450       }
2451       /*
2452       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2453       PetscSynchronizedFlush(A->comm);
2454       */
2455     }
2456   }
2457 
2458   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2459   ierr = PetscFree(work);CHKERRQ(ierr);
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatRelax_MPISBAIJ"
2465 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2466 {
2467   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2468   int            ierr,mbs=mat->mbs,bs=mat->bs;
2469   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2470   Vec            bb1;
2471 
2472   PetscFunctionBegin;
2473   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2474   if (bs > 1)
2475     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2476 
2477   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2478     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2479       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2480       its--;
2481     }
2482 
2483     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2484     while (its--){
2485 
2486       /* lower triangular part: slvec0b = - B^T*xx */
2487       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2488 
2489       /* copy xx into slvec0a */
2490       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2491       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2492       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2493       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2494 
2495       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2496 
2497       /* copy bb into slvec1a */
2498       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2499       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2500       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2501       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2502 
2503       /* set slvec1b = 0 */
2504       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2505 
2506       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2507       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2508       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2509       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2510 
2511       /* upper triangular part: bb1 = bb1 - B*x */
2512       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2513 
2514       /* local diagonal sweep */
2515       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2516     }
2517     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2518   } else {
2519     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2520   }
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2526 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2527 {
2528   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2529   int            ierr;
2530   PetscScalar    mone=-1.0;
2531   Vec            lvec1,bb1;
2532 
2533   PetscFunctionBegin;
2534   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2535   if (mat->bs > 1)
2536     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2537 
2538   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2539     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2540       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2541       its--;
2542     }
2543 
2544     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2545     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2546     while (its--){
2547       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2548 
2549       /* lower diagonal part: bb1 = bb - B^T*xx */
2550       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2551       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2552 
2553       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2554       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2555       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2556 
2557       /* upper diagonal part: bb1 = bb1 - B*x */
2558       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2559       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2560 
2561       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2562 
2563       /* diagonal sweep */
2564       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2565     }
2566     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2567     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2568   } else {
2569     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2570   }
2571   PetscFunctionReturn(0);
2572 }
2573 
2574