xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b6b3a6b24117a697d33d4b93e2ced21238e64e70)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
112         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
113         new_j = (int*)(new_a + bs2*new_nz); \
114         new_i = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 #ifndef MatSetValues_SeqBAIJ_B_Private
155 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
185  \
186         /* malloc new storage space */ \
187         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
188         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
189         new_j = (int*)(new_a + bs2*new_nz); \
190         new_i = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 #endif
231 
232 #if defined(PETSC_USE_MAT_SINGLE)
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValues_MPISBAIJ"
235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
238   int          ierr,i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   int         ierr,i,N = m*n*b->bs2;
262   MatScalar   *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283   int         ierr,i,N = m*n;
284   MatScalar   *vsingle;
285 
286   PetscFunctionBegin;
287   SETERRQ(1,"Function not yet written for SBAIJ format");
288   /* PetscFunctionReturn(0); */
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
294 {
295   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
296   int         ierr,i,N = m*n*b->bs2;
297   MatScalar   *vsingle;
298 
299   PetscFunctionBegin;
300   SETERRQ(1,"Function not yet written for SBAIJ format");
301   /* PetscFunctionReturn(0); */
302 }
303 #endif
304 
305 /* Only add/insert a(i,j) with i<=j (blocks).
306    Any a(i,j) with i>j input by user is ingored.
307 */
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
311 {
312   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
313   MatScalar    value;
314   PetscTruth   roworiented = baij->roworiented;
315   int          ierr,i,j,row,col;
316   int          rstart_orig=baij->rstart_bs;
317   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
318   int          cend_orig=baij->cend_bs,bs=baij->bs;
319 
320   /* Some Variables required in the macro */
321   Mat          A = baij->A;
322   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
323   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
324   MatScalar    *aa=a->a;
325 
326   Mat          B = baij->B;
327   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
328   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
329   MatScalar    *ba=b->a;
330 
331   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
332   int          low,high,t,ridx,cidx,bs2=a->bs2;
333   MatScalar    *ap,*bap;
334 
335   /* for stash */
336   int          n_loc, *in_loc=0;
337   MatScalar    *v_loc=0;
338 
339   PetscFunctionBegin;
340 
341   if(!baij->donotstash){
342     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
343     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
344   }
345 
346   for (i=0; i<m; i++) {
347     if (im[i] < 0) continue;
348 #if defined(PETSC_USE_BOPT_g)
349     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
350 #endif
351     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
352       row = im[i] - rstart_orig;              /* local row index */
353       for (j=0; j<n; j++) {
354         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
355         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
356           col = in[j] - cstart_orig;          /* local col index */
357           brow = row/bs; bcol = col/bs;
358           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
359           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
360           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
361           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
362         } else if (in[j] < 0) continue;
363 #if defined(PETSC_USE_BOPT_g)
364         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
365 #endif
366         else {  /* off-diag entry (B) */
367           if (mat->was_assembled) {
368             if (!baij->colmap) {
369               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
370             }
371 #if defined (PETSC_USE_CTABLE)
372             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
373             col  = col - 1;
374 #else
375             col = baij->colmap[in[j]/bs] - 1;
376 #endif
377             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
378               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
379               col =  in[j];
380               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
381               B = baij->B;
382               b = (Mat_SeqBAIJ*)(B)->data;
383               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
384               ba=b->a;
385             } else col += in[j]%bs;
386           } else col = in[j];
387           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
388           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
389           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
390         }
391       }
392     } else {  /* off processor entry */
393       if (!baij->donotstash) {
394         n_loc = 0;
395         for (j=0; j<n; j++){
396           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
397           in_loc[n_loc] = in[j];
398           if (roworiented) {
399             v_loc[n_loc] = v[i*n+j];
400           } else {
401             v_loc[n_loc] = v[j*m+i];
402           }
403           n_loc++;
404         }
405         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
406       }
407     }
408   }
409 
410   if(!baij->donotstash){
411     ierr = PetscFree(in_loc);CHKERRQ(ierr);
412     ierr = PetscFree(v_loc);CHKERRQ(ierr);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
420 {
421   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
422   const MatScalar *value;
423   MatScalar       *barray=baij->barray;
424   PetscTruth      roworiented = baij->roworiented;
425   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
426   int             rend=baij->rend,cstart=baij->cstart,stepval;
427   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
428 
429   PetscFunctionBegin;
430   if(!barray) {
431     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
432     baij->barray = barray;
433   }
434 
435   if (roworiented) {
436     stepval = (n-1)*bs;
437   } else {
438     stepval = (m-1)*bs;
439   }
440   for (i=0; i<m; i++) {
441     if (im[i] < 0) continue;
442 #if defined(PETSC_USE_BOPT_g)
443     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
444 #endif
445     if (im[i] >= rstart && im[i] < rend) {
446       row = im[i] - rstart;
447       for (j=0; j<n; j++) {
448         /* If NumCol = 1 then a copy is not required */
449         if ((roworiented) && (n == 1)) {
450           barray = (MatScalar*) v + i*bs2;
451         } else if((!roworiented) && (m == 1)) {
452           barray = (MatScalar*) v + j*bs2;
453         } else { /* Here a copy is required */
454           if (roworiented) {
455             value = v + i*(stepval+bs)*bs + j*bs;
456           } else {
457             value = v + j*(stepval+bs)*bs + i*bs;
458           }
459           for (ii=0; ii<bs; ii++,value+=stepval) {
460             for (jj=0; jj<bs; jj++) {
461               *barray++  = *value++;
462             }
463           }
464           barray -=bs2;
465         }
466 
467         if (in[j] >= cstart && in[j] < cend){
468           col  = in[j] - cstart;
469           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
470         }
471         else if (in[j] < 0) continue;
472 #if defined(PETSC_USE_BOPT_g)
473         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
474 #endif
475         else {
476           if (mat->was_assembled) {
477             if (!baij->colmap) {
478               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
479             }
480 
481 #if defined(PETSC_USE_BOPT_g)
482 #if defined (PETSC_USE_CTABLE)
483             { int data;
484               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
485               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
486             }
487 #else
488             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
489 #endif
490 #endif
491 #if defined (PETSC_USE_CTABLE)
492 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
493             col  = (col - 1)/bs;
494 #else
495             col = (baij->colmap[in[j]] - 1)/bs;
496 #endif
497             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
498               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
499               col =  in[j];
500             }
501           }
502           else col = in[j];
503           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
504         }
505       }
506     } else {
507       if (!baij->donotstash) {
508         if (roworiented) {
509           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
510         } else {
511           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
512         }
513       }
514     }
515   }
516   PetscFunctionReturn(0);
517 }
518 
519 #define HASH_KEY 0.6180339887
520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
523 #undef __FUNCT__
524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
526 {
527   PetscFunctionBegin;
528   SETERRQ(1,"Function not yet written for SBAIJ format");
529   /* PetscFunctionReturn(0); */
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
535 {
536   PetscFunctionBegin;
537   SETERRQ(1,"Function not yet written for SBAIJ format");
538   /* PetscFunctionReturn(0); */
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatGetValues_MPISBAIJ"
543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
544 {
545   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
546   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
547   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
552     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
553     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
554       row = idxm[i] - bsrstart;
555       for (j=0; j<n; j++) {
556         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
557         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
558         if (idxn[j] >= bscstart && idxn[j] < bscend){
559           col = idxn[j] - bscstart;
560           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
561         } else {
562           if (!baij->colmap) {
563             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
564           }
565 #if defined (PETSC_USE_CTABLE)
566           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
567           data --;
568 #else
569           data = baij->colmap[idxn[j]/bs]-1;
570 #endif
571           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
572           else {
573             col  = data + idxn[j]%bs;
574             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575           }
576         }
577       }
578     } else {
579       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
580     }
581   }
582  PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatNorm_MPISBAIJ"
587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
588 {
589   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
590   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
591   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
592   int        ierr;
593   PetscReal  sum[2],*lnorm2;
594 
595   PetscFunctionBegin;
596   if (baij->size == 1) {
597     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
598   } else {
599     if (type == NORM_FROBENIUS) {
600       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
601       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
602       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
603       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
605       /*
606       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
607       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
608       */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       /*
611       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
612       PetscSynchronizedFlush(mat->comm); */
613 
614       *norm = sqrt(sum[0] + 2*sum[1]);
615       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 /*
624   Creates the hash table, and sets the table
625   This table is created only once.
626   If new entried need to be added to the matrix
627   then the hash table has to be destroyed and
628   recreated.
629 */
630 #undef __FUNCT__
631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
633 {
634   PetscFunctionBegin;
635   SETERRQ(1,"Function not yet written for SBAIJ format");
636   /* PetscFunctionReturn(0); */
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
642 {
643   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
644   int         ierr,nstash,reallocs;
645   InsertMode  addv;
646 
647   PetscFunctionBegin;
648   if (baij->donotstash) {
649     PetscFunctionReturn(0);
650   }
651 
652   /* make sure all processors are either in INSERTMODE or ADDMODE */
653   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
654   if (addv == (ADD_VALUES|INSERT_VALUES)) {
655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
656   }
657   mat->insertmode = addv; /* in case this processor had no cache */
658 
659   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
673   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
674   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
675   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
676   int         *row,*col,other_disassembled;
677   PetscTruth  r1,r2,r3;
678   MatScalar   *val;
679   InsertMode  addv = mat->insertmode;
680 
681   PetscFunctionBegin;
682 
683   if (!baij->donotstash) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       /*
687       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
688       PetscSynchronizedFlush(mat->comm);
689       */
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
695         if (j < n) ncols = j-i;
696         else       ncols = n-i;
697         /* Now assemble all these values with a single function call */
698         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703     /* Now process the block-stash. Since the values are stashed column-oriented,
704        set the roworiented flag to column oriented, and after MatSetValues()
705        restore the original flags */
706     r1 = baij->roworiented;
707     r2 = a->roworiented;
708     r3 = b->roworiented;
709     baij->roworiented = PETSC_FALSE;
710     a->roworiented    = PETSC_FALSE;
711     b->roworiented    = PETSC_FALSE;
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n;) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
719         if (j < n) ncols = j-i;
720         else       ncols = n-i;
721         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
722         i = j;
723       }
724     }
725     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
726     baij->roworiented = r1;
727     a->roworiented    = r2;
728     b->roworiented    = r3;
729   }
730 
731   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746 
747   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
748     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
749   }
750   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
752 
753 #if defined(PETSC_USE_BOPT_g)
754   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
755     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
756     baij->ht_total_ct  = 0;
757     baij->ht_insert_ct = 0;
758   }
759 #endif
760   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
761     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
762     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
763     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
764   }
765 
766   if (baij->rowvalues) {
767     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
768     baij->rowvalues = 0;
769   }
770 
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
779   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
780   PetscTruth        isascii,isdraw;
781   PetscViewer       sviewer;
782   PetscViewerFormat format;
783 
784   PetscFunctionBegin;
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
786   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
787   if (isascii) {
788     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
789     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
790       MatInfo info;
791       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
794               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
795               baij->bs,(int)info.memory);CHKERRQ(ierr);
796       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
797       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
798       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
799       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
800       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
801       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_INFO) {
804       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw       draw;
811     PetscTruth isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
814   }
815 
816   if (size == 1) {
817     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
818     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
819   } else {
820     /* assemble the entire matrix onto first processor. */
821     Mat         A;
822     Mat_SeqSBAIJ *Aloc;
823     Mat_SeqBAIJ *Bloc;
824     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
825     MatScalar   *a;
826 
827     if (!rank) {
828       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
829     } else {
830       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
831     }
832     PetscLogObjectParent(mat,A);
833 
834     /* copy over the A part */
835     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
836     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
837     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
838 
839     for (i=0; i<mbs; i++) {
840       rvals[0] = bs*(baij->rstart + i);
841       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
842       for (j=ai[i]; j<ai[i+1]; j++) {
843         col = (baij->cstart+aj[j])*bs;
844         for (k=0; k<bs; k++) {
845           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
846           col++; a += bs;
847         }
848       }
849     }
850     /* copy over the B part */
851     Bloc = (Mat_SeqBAIJ*)baij->B->data;
852     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = baij->garray[aj[j]]*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     ierr = PetscFree(rvals);CHKERRQ(ierr);
865     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867     /*
868        Everyone has to call to draw the matrix since the graphics waits are
869        synchronized across all processors that share the PetscDraw object
870     */
871     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
872     if (!rank) {
873       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
874       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
875     }
876     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
877     ierr = MatDestroy(A);CHKERRQ(ierr);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatView_MPISBAIJ"
884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
885 {
886   int        ierr;
887   PetscTruth isascii,isdraw,issocket,isbinary;
888 
889   PetscFunctionBegin;
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
894   if (isascii || isdraw || issocket || isbinary) {
895     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
896   } else {
897     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatDestroy_MPISBAIJ"
904 int MatDestroy_MPISBAIJ(Mat mat)
905 {
906   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
907   int         ierr;
908 
909   PetscFunctionBegin;
910 #if defined(PETSC_USE_LOG)
911   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
912 #endif
913   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
914   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
915   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
917   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
918 #if defined (PETSC_USE_CTABLE)
919   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
920 #else
921   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
922 #endif
923   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
924   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
925   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
926   if (baij->slvec0) {
927     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
928     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
929   }
930   if (baij->slvec1) {
931     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
933     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
934   }
935   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
936   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
937   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
938   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
939 #if defined(PETSC_USE_MAT_SINGLE)
940   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
941 #endif
942   ierr = PetscFree(baij);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMult_MPISBAIJ"
948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
949 {
950   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
951   int         ierr,nt,mbs=a->mbs,bs=a->bs;
952   PetscScalar *x,*from,zero=0.0;
953 
954   PetscFunctionBegin;
955   /*
956   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
957   PetscSynchronizedFlush(A->comm);
958   */
959   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
960   if (nt != A->n) {
961     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
962   }
963   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
964   if (nt != A->m) {
965     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
966   }
967 
968   /* diagonal part */
969   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
970   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
971 
972   /* subdiagonal part */
973   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
974 
975   /* copy x into the vec slvec0 */
976   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
977   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
978   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
979   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
980 
981   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
982   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
984 
985   /* supperdiagonal part */
986   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
987 
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
993 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
994 {
995   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
996   int         ierr,nt;
997 
998   PetscFunctionBegin;
999   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1000   if (nt != A->n) {
1001     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1002   }
1003   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1004   if (nt != A->m) {
1005     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1006   }
1007 
1008   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1009   /* do diagonal part */
1010   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1011   /* do supperdiagonal part */
1012   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1013   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1014   /* do subdiagonal part */
1015   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1016   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1017   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1018 
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1024 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1025 {
1026   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1027   int          ierr,mbs=a->mbs,bs=a->bs;
1028   PetscScalar  *x,*from,zero=0.0;
1029 
1030   PetscFunctionBegin;
1031   /*
1032   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1033   PetscSynchronizedFlush(A->comm);
1034   */
1035   /* diagonal part */
1036   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1037   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1038 
1039   /* subdiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1041 
1042   /* copy x into the vec slvec0 */
1043   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1044   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1045   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1046   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1047 
1048   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1049   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1050   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1051 
1052   /* supperdiagonal part */
1053   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1054 
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1060 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1061 {
1062   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1063   int        ierr;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1067   /* do diagonal part */
1068   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1069   /* do supperdiagonal part */
1070   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1071   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1072 
1073   /* do subdiagonal part */
1074   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1075   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1076   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1077 
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1083 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1084 {
1085   int ierr;
1086 
1087   PetscFunctionBegin;
1088   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1094 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1095 {
1096   int ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /*
1104   This only works correctly for square matrices where the subblock A->A is the
1105    diagonal block
1106 */
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1109 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1110 {
1111   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1112   int         ierr;
1113 
1114   PetscFunctionBegin;
1115   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1116   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatScale_MPISBAIJ"
1122 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1123 {
1124   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1125   int         ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1129   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1135 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1136 {
1137   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1138   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1139   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1140   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1141   int            *cmap,*idx_p,cstart = mat->cstart;
1142 
1143   PetscFunctionBegin;
1144   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1145   mat->getrowactive = PETSC_TRUE;
1146 
1147   if (!mat->rowvalues && (idx || v)) {
1148     /*
1149         allocate enough space to hold information from the longest row.
1150     */
1151     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1152     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1153     int     max = 1,mbs = mat->mbs,tmp;
1154     for (i=0; i<mbs; i++) {
1155       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1156       if (max < tmp) { max = tmp; }
1157     }
1158     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1159     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1160   }
1161 
1162   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1163   lrow = row - brstart;  /* local row index */
1164 
1165   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1166   if (!v)   {pvA = 0; pvB = 0;}
1167   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1168   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1169   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1170   nztot = nzA + nzB;
1171 
1172   cmap  = mat->garray;
1173   if (v  || idx) {
1174     if (nztot) {
1175       /* Sort by increasing column numbers, assuming A and B already sorted */
1176       int imark = -1;
1177       if (v) {
1178         *v = v_p = mat->rowvalues;
1179         for (i=0; i<nzB; i++) {
1180           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1181           else break;
1182         }
1183         imark = i;
1184         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1185         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1186       }
1187       if (idx) {
1188         *idx = idx_p = mat->rowindices;
1189         if (imark > -1) {
1190           for (i=0; i<imark; i++) {
1191             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1192           }
1193         } else {
1194           for (i=0; i<nzB; i++) {
1195             if (cmap[cworkB[i]/bs] < cstart)
1196               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1197             else break;
1198           }
1199           imark = i;
1200         }
1201         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1202         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1203       }
1204     } else {
1205       if (idx) *idx = 0;
1206       if (v)   *v   = 0;
1207     }
1208   }
1209   *nz = nztot;
1210   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1211   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 #undef __FUNCT__
1216 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1217 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1218 {
1219   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1220 
1221   PetscFunctionBegin;
1222   if (baij->getrowactive == PETSC_FALSE) {
1223     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1224   }
1225   baij->getrowactive = PETSC_FALSE;
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1231 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1232 {
1233   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1234 
1235   PetscFunctionBegin;
1236   *bs = baij->bs;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1242 int MatZeroEntries_MPISBAIJ(Mat A)
1243 {
1244   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1245   int         ierr;
1246 
1247   PetscFunctionBegin;
1248   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1249   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1255 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1256 {
1257   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1258   Mat         A = a->A,B = a->B;
1259   int         ierr;
1260   PetscReal   isend[5],irecv[5];
1261 
1262   PetscFunctionBegin;
1263   info->block_size     = (PetscReal)a->bs;
1264   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1265   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1266   isend[3] = info->memory;  isend[4] = info->mallocs;
1267   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1268   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1269   isend[3] += info->memory;  isend[4] += info->mallocs;
1270   if (flag == MAT_LOCAL) {
1271     info->nz_used      = isend[0];
1272     info->nz_allocated = isend[1];
1273     info->nz_unneeded  = isend[2];
1274     info->memory       = isend[3];
1275     info->mallocs      = isend[4];
1276   } else if (flag == MAT_GLOBAL_MAX) {
1277     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1278     info->nz_used      = irecv[0];
1279     info->nz_allocated = irecv[1];
1280     info->nz_unneeded  = irecv[2];
1281     info->memory       = irecv[3];
1282     info->mallocs      = irecv[4];
1283   } else if (flag == MAT_GLOBAL_SUM) {
1284     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1285     info->nz_used      = irecv[0];
1286     info->nz_allocated = irecv[1];
1287     info->nz_unneeded  = irecv[2];
1288     info->memory       = irecv[3];
1289     info->mallocs      = irecv[4];
1290   } else {
1291     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1292   }
1293   info->rows_global       = (PetscReal)A->M;
1294   info->columns_global    = (PetscReal)A->N;
1295   info->rows_local        = (PetscReal)A->m;
1296   info->columns_local     = (PetscReal)A->N;
1297   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1298   info->fill_ratio_needed = 0;
1299   info->factor_mallocs    = 0;
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1305 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1306 {
1307   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1308   int         ierr;
1309 
1310   PetscFunctionBegin;
1311   switch (op) {
1312   case MAT_NO_NEW_NONZERO_LOCATIONS:
1313   case MAT_YES_NEW_NONZERO_LOCATIONS:
1314   case MAT_COLUMNS_UNSORTED:
1315   case MAT_COLUMNS_SORTED:
1316   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1317   case MAT_KEEP_ZEROED_ROWS:
1318   case MAT_NEW_NONZERO_LOCATION_ERR:
1319     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1320     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1321     break;
1322   case MAT_ROW_ORIENTED:
1323     a->roworiented = PETSC_TRUE;
1324     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1325     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1326     break;
1327   case MAT_ROWS_SORTED:
1328   case MAT_ROWS_UNSORTED:
1329   case MAT_YES_NEW_DIAGONALS:
1330     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1331     break;
1332   case MAT_COLUMN_ORIENTED:
1333     a->roworiented = PETSC_FALSE;
1334     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1335     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1336     break;
1337   case MAT_IGNORE_OFF_PROC_ENTRIES:
1338     a->donotstash = PETSC_TRUE;
1339     break;
1340   case MAT_NO_NEW_DIAGONALS:
1341     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1342   case MAT_USE_HASH_TABLE:
1343     a->ht_flag = PETSC_TRUE;
1344     break;
1345   case MAT_NOT_SYMMETRIC:
1346   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1347   case MAT_HERMITIAN:
1348     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1349   case MAT_SYMMETRIC:
1350   case MAT_STRUCTURALLY_SYMMETRIC:
1351   case MAT_NOT_HERMITIAN:
1352   case MAT_SYMMETRY_ETERNAL:
1353   case MAT_NOT_SYMMETRY_ETERNAL:
1354     break;
1355   default:
1356     SETERRQ(PETSC_ERR_SUP,"unknown option");
1357   }
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1363 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1364 {
1365   int ierr;
1366   PetscFunctionBegin;
1367   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1373 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1374 {
1375   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1376   Mat         a = baij->A,b = baij->B;
1377   int         ierr,s1,s2,s3;
1378 
1379   PetscFunctionBegin;
1380   if (ll != rr) {
1381     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1382   }
1383   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1384   if (rr) {
1385     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1386     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1387     /* Overlap communication with computation. */
1388     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1389     /*} if (ll) { */
1390     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1391     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1392     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1393     /* } */
1394   /* scale  the diagonal block */
1395   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1396 
1397   /* if (rr) { */
1398     /* Do a scatter end and then right scale the off-diagonal block */
1399     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1400     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1401   }
1402 
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1408 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1409 {
1410   PetscFunctionBegin;
1411   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1416 int MatPrintHelp_MPISBAIJ(Mat A)
1417 {
1418   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1419   MPI_Comm    comm = A->comm;
1420   static int  called = 0;
1421   int         ierr;
1422 
1423   PetscFunctionBegin;
1424   if (!a->rank) {
1425     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1426   }
1427   if (called) {PetscFunctionReturn(0);} else called = 1;
1428   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1429   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1435 int MatSetUnfactored_MPISBAIJ(Mat A)
1436 {
1437   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1438   int         ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatEqual_MPISBAIJ"
1449 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1450 {
1451   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1452   Mat         a,b,c,d;
1453   PetscTruth  flg;
1454   int         ierr;
1455 
1456   PetscFunctionBegin;
1457   a = matA->A; b = matA->B;
1458   c = matB->A; d = matB->B;
1459 
1460   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1461   if (flg == PETSC_TRUE) {
1462     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1463   }
1464   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1470 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1471 {
1472   int        ierr;
1473 
1474   PetscFunctionBegin;
1475   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1481 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1482 {
1483   int        i,ierr;
1484   PetscTruth flg;
1485 
1486   for (i=0; i<n; i++) {
1487     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1488     if (!flg) {
1489       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1490     }
1491   }
1492   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 
1497 /* -------------------------------------------------------------------*/
1498 static struct _MatOps MatOps_Values = {
1499        MatSetValues_MPISBAIJ,
1500        MatGetRow_MPISBAIJ,
1501        MatRestoreRow_MPISBAIJ,
1502        MatMult_MPISBAIJ,
1503 /* 4*/ MatMultAdd_MPISBAIJ,
1504        MatMultTranspose_MPISBAIJ,
1505        MatMultTransposeAdd_MPISBAIJ,
1506        0,
1507        0,
1508        0,
1509 /*10*/ 0,
1510        0,
1511        0,
1512        MatRelax_MPISBAIJ,
1513        MatTranspose_MPISBAIJ,
1514 /*15*/ MatGetInfo_MPISBAIJ,
1515        MatEqual_MPISBAIJ,
1516        MatGetDiagonal_MPISBAIJ,
1517        MatDiagonalScale_MPISBAIJ,
1518        MatNorm_MPISBAIJ,
1519 /*20*/ MatAssemblyBegin_MPISBAIJ,
1520        MatAssemblyEnd_MPISBAIJ,
1521        0,
1522        MatSetOption_MPISBAIJ,
1523        MatZeroEntries_MPISBAIJ,
1524 /*25*/ MatZeroRows_MPISBAIJ,
1525        0,
1526        0,
1527        0,
1528        0,
1529 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1530        0,
1531        0,
1532        0,
1533        0,
1534 /*35*/ MatDuplicate_MPISBAIJ,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*40*/ 0,
1540        MatGetSubMatrices_MPISBAIJ,
1541        0,
1542        MatGetValues_MPISBAIJ,
1543        0,
1544 /*45*/ MatPrintHelp_MPISBAIJ,
1545        MatScale_MPISBAIJ,
1546        0,
1547        0,
1548        0,
1549 /*50*/ MatGetBlockSize_MPISBAIJ,
1550        0,
1551        0,
1552        0,
1553        0,
1554 /*55*/ 0,
1555        0,
1556        MatSetUnfactored_MPISBAIJ,
1557        0,
1558        MatSetValuesBlocked_MPISBAIJ,
1559 /*60*/ 0,
1560        0,
1561        0,
1562        MatGetPetscMaps_Petsc,
1563        0,
1564 /*65*/ 0,
1565        0,
1566        0,
1567        0,
1568        0,
1569 /*70*/ MatGetRowMax_MPISBAIJ,
1570        0,
1571        0,
1572        0,
1573        0,
1574 /*75*/ 0,
1575        0,
1576        0,
1577        0,
1578        0,
1579 /*80*/ 0,
1580        0,
1581        0,
1582        0,
1583 /*85*/ MatLoad_MPISBAIJ
1584 };
1585 
1586 
1587 EXTERN_C_BEGIN
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1590 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1591 {
1592   PetscFunctionBegin;
1593   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1594   *iscopy = PETSC_FALSE;
1595   PetscFunctionReturn(0);
1596 }
1597 EXTERN_C_END
1598 
1599 EXTERN_C_BEGIN
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1602 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1603 {
1604   Mat_MPISBAIJ *b;
1605   int          ierr,i,mbs,Mbs;
1606 
1607   PetscFunctionBegin;
1608   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1609 
1610   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1611   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1612   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1613   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1614   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1615   if (d_nnz) {
1616     for (i=0; i<B->m/bs; i++) {
1617       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1618     }
1619   }
1620   if (o_nnz) {
1621     for (i=0; i<B->m/bs; i++) {
1622       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1623     }
1624   }
1625   B->preallocated = PETSC_TRUE;
1626   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1627   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1628   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1629   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1630 
1631   b   = (Mat_MPISBAIJ*)B->data;
1632   mbs = B->m/bs;
1633   Mbs = B->M/bs;
1634   if (mbs*bs != B->m) {
1635     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1636   }
1637 
1638   b->bs  = bs;
1639   b->bs2 = bs*bs;
1640   b->mbs = mbs;
1641   b->nbs = mbs;
1642   b->Mbs = Mbs;
1643   b->Nbs = Mbs;
1644 
1645   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1646   b->rowners[0]    = 0;
1647   for (i=2; i<=b->size; i++) {
1648     b->rowners[i] += b->rowners[i-1];
1649   }
1650   b->rstart    = b->rowners[b->rank];
1651   b->rend      = b->rowners[b->rank+1];
1652   b->cstart    = b->rstart;
1653   b->cend      = b->rend;
1654   for (i=0; i<=b->size; i++) {
1655     b->rowners_bs[i] = b->rowners[i]*bs;
1656   }
1657   b->rstart_bs = b-> rstart*bs;
1658   b->rend_bs   = b->rend*bs;
1659 
1660   b->cstart_bs = b->cstart*bs;
1661   b->cend_bs   = b->cend*bs;
1662 
1663 
1664   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1665   PetscLogObjectParent(B,b->A);
1666   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1667   PetscLogObjectParent(B,b->B);
1668 
1669   /* build cache for off array entries formed */
1670   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1671 
1672   PetscFunctionReturn(0);
1673 }
1674 EXTERN_C_END
1675 
1676 /*MC
1677    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1678    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1679 
1680    Options Database Keys:
1681 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1682 
1683   Level: beginner
1684 
1685 .seealso: MatCreateMPISBAIJ
1686 M*/
1687 
1688 EXTERN_C_BEGIN
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatCreate_MPISBAIJ"
1691 int MatCreate_MPISBAIJ(Mat B)
1692 {
1693   Mat_MPISBAIJ *b;
1694   int          ierr;
1695   PetscTruth   flg;
1696 
1697   PetscFunctionBegin;
1698 
1699   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1700   B->data = (void*)b;
1701   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1702   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1703 
1704   B->ops->destroy    = MatDestroy_MPISBAIJ;
1705   B->ops->view       = MatView_MPISBAIJ;
1706   B->mapping    = 0;
1707   B->factor     = 0;
1708   B->assembled  = PETSC_FALSE;
1709 
1710   B->insertmode = NOT_SET_VALUES;
1711   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1712   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1713 
1714   /* build local table of row and column ownerships */
1715   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1716   b->cowners    = b->rowners + b->size + 2;
1717   b->rowners_bs = b->cowners + b->size + 2;
1718   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1719 
1720   /* build cache for off array entries formed */
1721   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1722   b->donotstash  = PETSC_FALSE;
1723   b->colmap      = PETSC_NULL;
1724   b->garray      = PETSC_NULL;
1725   b->roworiented = PETSC_TRUE;
1726 
1727 #if defined(PETSC_USE_MAT_SINGLE)
1728   /* stuff for MatSetValues_XXX in single precision */
1729   b->setvalueslen     = 0;
1730   b->setvaluescopy    = PETSC_NULL;
1731 #endif
1732 
1733   /* stuff used in block assembly */
1734   b->barray       = 0;
1735 
1736   /* stuff used for matrix vector multiply */
1737   b->lvec         = 0;
1738   b->Mvctx        = 0;
1739   b->slvec0       = 0;
1740   b->slvec0b      = 0;
1741   b->slvec1       = 0;
1742   b->slvec1a      = 0;
1743   b->slvec1b      = 0;
1744   b->sMvctx       = 0;
1745 
1746   /* stuff for MatGetRow() */
1747   b->rowindices   = 0;
1748   b->rowvalues    = 0;
1749   b->getrowactive = PETSC_FALSE;
1750 
1751   /* hash table stuff */
1752   b->ht           = 0;
1753   b->hd           = 0;
1754   b->ht_size      = 0;
1755   b->ht_flag      = PETSC_FALSE;
1756   b->ht_fact      = 0;
1757   b->ht_total_ct  = 0;
1758   b->ht_insert_ct = 0;
1759 
1760   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1761   if (flg) {
1762     PetscReal fact = 1.39;
1763     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1764     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1765     if (fact <= 1.0) fact = 1.39;
1766     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1767     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1768   }
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1770                                      "MatStoreValues_MPISBAIJ",
1771                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1773                                      "MatRetrieveValues_MPISBAIJ",
1774                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1776                                      "MatGetDiagonalBlock_MPISBAIJ",
1777                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1779                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1780                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1781   PetscFunctionReturn(0);
1782 }
1783 EXTERN_C_END
1784 
1785 /*MC
1786    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1787 
1788    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1789    and MATMPISBAIJ otherwise.
1790 
1791    Options Database Keys:
1792 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1793 
1794   Level: beginner
1795 
1796 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1797 M*/
1798 
1799 EXTERN_C_BEGIN
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatCreate_SBAIJ"
1802 int MatCreate_SBAIJ(Mat A) {
1803   int ierr,size;
1804 
1805   PetscFunctionBegin;
1806   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1807   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1808   if (size == 1) {
1809     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1810   } else {
1811     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1812   }
1813   PetscFunctionReturn(0);
1814 }
1815 EXTERN_C_END
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1819 /*@C
1820    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1821    the user should preallocate the matrix storage by setting the parameters
1822    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1823    performance can be increased by more than a factor of 50.
1824 
1825    Collective on Mat
1826 
1827    Input Parameters:
1828 +  A - the matrix
1829 .  bs   - size of blockk
1830 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1831            submatrix  (same for all local rows)
1832 .  d_nnz - array containing the number of block nonzeros in the various block rows
1833            in the upper triangular and diagonal part of the in diagonal portion of the local
1834            (possibly different for each block row) or PETSC_NULL.  You must leave room
1835            for the diagonal entry even if it is zero.
1836 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1837            submatrix (same for all local rows).
1838 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1839            off-diagonal portion of the local submatrix (possibly different for
1840            each block row) or PETSC_NULL.
1841 
1842 
1843    Options Database Keys:
1844 .   -mat_no_unroll - uses code that does not unroll the loops in the
1845                      block calculations (much slower)
1846 .   -mat_block_size - size of the blocks to use
1847 
1848    Notes:
1849 
1850    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1851    than it must be used on all processors that share the object for that argument.
1852 
1853    Storage Information:
1854    For a square global matrix we define each processor's diagonal portion
1855    to be its local rows and the corresponding columns (a square submatrix);
1856    each processor's off-diagonal portion encompasses the remainder of the
1857    local matrix (a rectangular submatrix).
1858 
1859    The user can specify preallocated storage for the diagonal part of
1860    the local submatrix with either d_nz or d_nnz (not both).  Set
1861    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1862    memory allocation.  Likewise, specify preallocated storage for the
1863    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1864 
1865    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1866    the figure below we depict these three local rows and all columns (0-11).
1867 
1868 .vb
1869            0 1 2 3 4 5 6 7 8 9 10 11
1870           -------------------
1871    row 3  |  o o o d d d o o o o o o
1872    row 4  |  o o o d d d o o o o o o
1873    row 5  |  o o o d d d o o o o o o
1874           -------------------
1875 .ve
1876 
1877    Thus, any entries in the d locations are stored in the d (diagonal)
1878    submatrix, and any entries in the o locations are stored in the
1879    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1880    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1881 
1882    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1883    plus the diagonal part of the d matrix,
1884    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1885    In general, for PDE problems in which most nonzeros are near the diagonal,
1886    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1887    or you will get TERRIBLE performance; see the users' manual chapter on
1888    matrices.
1889 
1890    Level: intermediate
1891 
1892 .keywords: matrix, block, aij, compressed row, sparse, parallel
1893 
1894 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1895 @*/
1896 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1897 {
1898   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1899 
1900   PetscFunctionBegin;
1901   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1902   if (f) {
1903     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatCreateMPISBAIJ"
1910 /*@C
1911    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1912    (block compressed row).  For good matrix assembly performance
1913    the user should preallocate the matrix storage by setting the parameters
1914    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1915    performance can be increased by more than a factor of 50.
1916 
1917    Collective on MPI_Comm
1918 
1919    Input Parameters:
1920 +  comm - MPI communicator
1921 .  bs   - size of blockk
1922 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1923            This value should be the same as the local size used in creating the
1924            y vector for the matrix-vector product y = Ax.
1925 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1926            This value should be the same as the local size used in creating the
1927            x vector for the matrix-vector product y = Ax.
1928 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1929 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1930 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1931            submatrix  (same for all local rows)
1932 .  d_nnz - array containing the number of block nonzeros in the various block rows
1933            in the upper triangular portion of the in diagonal portion of the local
1934            (possibly different for each block block row) or PETSC_NULL.
1935            You must leave room for the diagonal entry even if it is zero.
1936 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1937            submatrix (same for all local rows).
1938 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1939            off-diagonal portion of the local submatrix (possibly different for
1940            each block row) or PETSC_NULL.
1941 
1942    Output Parameter:
1943 .  A - the matrix
1944 
1945    Options Database Keys:
1946 .   -mat_no_unroll - uses code that does not unroll the loops in the
1947                      block calculations (much slower)
1948 .   -mat_block_size - size of the blocks to use
1949 .   -mat_mpi - use the parallel matrix data structures even on one processor
1950                (defaults to using SeqBAIJ format on one processor)
1951 
1952    Notes:
1953    The user MUST specify either the local or global matrix dimensions
1954    (possibly both).
1955 
1956    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1957    than it must be used on all processors that share the object for that argument.
1958 
1959    Storage Information:
1960    For a square global matrix we define each processor's diagonal portion
1961    to be its local rows and the corresponding columns (a square submatrix);
1962    each processor's off-diagonal portion encompasses the remainder of the
1963    local matrix (a rectangular submatrix).
1964 
1965    The user can specify preallocated storage for the diagonal part of
1966    the local submatrix with either d_nz or d_nnz (not both).  Set
1967    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1968    memory allocation.  Likewise, specify preallocated storage for the
1969    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1970 
1971    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1972    the figure below we depict these three local rows and all columns (0-11).
1973 
1974 .vb
1975            0 1 2 3 4 5 6 7 8 9 10 11
1976           -------------------
1977    row 3  |  o o o d d d o o o o o o
1978    row 4  |  o o o d d d o o o o o o
1979    row 5  |  o o o d d d o o o o o o
1980           -------------------
1981 .ve
1982 
1983    Thus, any entries in the d locations are stored in the d (diagonal)
1984    submatrix, and any entries in the o locations are stored in the
1985    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1986    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1987 
1988    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1989    plus the diagonal part of the d matrix,
1990    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1991    In general, for PDE problems in which most nonzeros are near the diagonal,
1992    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1993    or you will get TERRIBLE performance; see the users' manual chapter on
1994    matrices.
1995 
1996    Level: intermediate
1997 
1998 .keywords: matrix, block, aij, compressed row, sparse, parallel
1999 
2000 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2001 @*/
2002 
2003 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2004 {
2005   int ierr,size;
2006 
2007   PetscFunctionBegin;
2008   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2009   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2010   if (size > 1) {
2011     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2012     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2013   } else {
2014     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2015     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2016   }
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2023 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2024 {
2025   Mat          mat;
2026   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2027   int          ierr,len=0;
2028 
2029   PetscFunctionBegin;
2030   *newmat       = 0;
2031   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2032   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2033   mat->preallocated = PETSC_TRUE;
2034   a = (Mat_MPISBAIJ*)mat->data;
2035   a->bs  = oldmat->bs;
2036   a->bs2 = oldmat->bs2;
2037   a->mbs = oldmat->mbs;
2038   a->nbs = oldmat->nbs;
2039   a->Mbs = oldmat->Mbs;
2040   a->Nbs = oldmat->Nbs;
2041 
2042   a->rstart       = oldmat->rstart;
2043   a->rend         = oldmat->rend;
2044   a->cstart       = oldmat->cstart;
2045   a->cend         = oldmat->cend;
2046   a->size         = oldmat->size;
2047   a->rank         = oldmat->rank;
2048   a->donotstash   = oldmat->donotstash;
2049   a->roworiented  = oldmat->roworiented;
2050   a->rowindices   = 0;
2051   a->rowvalues    = 0;
2052   a->getrowactive = PETSC_FALSE;
2053   a->barray       = 0;
2054   a->rstart_bs    = oldmat->rstart_bs;
2055   a->rend_bs      = oldmat->rend_bs;
2056   a->cstart_bs    = oldmat->cstart_bs;
2057   a->cend_bs      = oldmat->cend_bs;
2058 
2059   /* hash table stuff */
2060   a->ht           = 0;
2061   a->hd           = 0;
2062   a->ht_size      = 0;
2063   a->ht_flag      = oldmat->ht_flag;
2064   a->ht_fact      = oldmat->ht_fact;
2065   a->ht_total_ct  = 0;
2066   a->ht_insert_ct = 0;
2067 
2068   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2069   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2070   a->cowners    = a->rowners + a->size + 2;
2071   a->rowners_bs = a->cowners + a->size + 2;
2072   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2073   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2074   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2075   if (oldmat->colmap) {
2076 #if defined (PETSC_USE_CTABLE)
2077     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2078 #else
2079     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2080     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2081     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2082 #endif
2083   } else a->colmap = 0;
2084   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2085     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2086     PetscLogObjectMemory(mat,len*sizeof(int));
2087     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2088   } else a->garray = 0;
2089 
2090   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2091   PetscLogObjectParent(mat,a->lvec);
2092   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2093 
2094   PetscLogObjectParent(mat,a->Mvctx);
2095   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2096   PetscLogObjectParent(mat,a->A);
2097   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2098   PetscLogObjectParent(mat,a->B);
2099   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2100   *newmat = mat;
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #include "petscsys.h"
2105 
2106 #undef __FUNCT__
2107 #define __FUNCT__ "MatLoad_MPISBAIJ"
2108 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2109 {
2110   Mat          A;
2111   int          i,nz,ierr,j,rstart,rend,fd;
2112   PetscScalar  *vals,*buf;
2113   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2114   MPI_Status   status;
2115   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2116   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2117   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2118   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2119   int          dcount,kmax,k,nzcount,tmp;
2120 
2121   PetscFunctionBegin;
2122   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2123 
2124   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2125   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2126   if (!rank) {
2127     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2128     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2129     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2130     if (header[3] < 0) {
2131       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2132     }
2133   }
2134 
2135   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2136   M = header[1]; N = header[2];
2137 
2138   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2139 
2140   /*
2141      This code adds extra rows to make sure the number of rows is
2142      divisible by the blocksize
2143   */
2144   Mbs        = M/bs;
2145   extra_rows = bs - M + bs*(Mbs);
2146   if (extra_rows == bs) extra_rows = 0;
2147   else                  Mbs++;
2148   if (extra_rows &&!rank) {
2149     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2150   }
2151 
2152   /* determine ownership of all rows */
2153   mbs        = Mbs/size + ((Mbs % size) > rank);
2154   m          = mbs*bs;
2155   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2156   browners   = rowners + size + 1;
2157   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2158   rowners[0] = 0;
2159   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2160   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2161   rstart = rowners[rank];
2162   rend   = rowners[rank+1];
2163 
2164   /* distribute row lengths to all processors */
2165   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2166   if (!rank) {
2167     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2168     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2169     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2170     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2171     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2172     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2173     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2174   } else {
2175     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2176   }
2177 
2178   if (!rank) {   /* procs[0] */
2179     /* calculate the number of nonzeros on each processor */
2180     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2181     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2182     for (i=0; i<size; i++) {
2183       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2184         procsnz[i] += rowlengths[j];
2185       }
2186     }
2187     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2188 
2189     /* determine max buffer needed and allocate it */
2190     maxnz = 0;
2191     for (i=0; i<size; i++) {
2192       maxnz = PetscMax(maxnz,procsnz[i]);
2193     }
2194     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2195 
2196     /* read in my part of the matrix column indices  */
2197     nz     = procsnz[0];
2198     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2199     mycols = ibuf;
2200     if (size == 1)  nz -= extra_rows;
2201     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2202     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2203 
2204     /* read in every ones (except the last) and ship off */
2205     for (i=1; i<size-1; i++) {
2206       nz   = procsnz[i];
2207       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2208       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2209     }
2210     /* read in the stuff for the last proc */
2211     if (size != 1) {
2212       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2213       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2214       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2215       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2216     }
2217     ierr = PetscFree(cols);CHKERRQ(ierr);
2218   } else {  /* procs[i], i>0 */
2219     /* determine buffer space needed for message */
2220     nz = 0;
2221     for (i=0; i<m; i++) {
2222       nz += locrowlens[i];
2223     }
2224     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2225     mycols = ibuf;
2226     /* receive message of column indices*/
2227     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2228     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2229     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2230   }
2231 
2232   /* loop over local rows, determining number of off diagonal entries */
2233   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2234   odlens   = dlens + (rend-rstart);
2235   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2236   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2237   masked1  = mask    + Mbs;
2238   masked2  = masked1 + Mbs;
2239   rowcount = 0; nzcount = 0;
2240   for (i=0; i<mbs; i++) {
2241     dcount  = 0;
2242     odcount = 0;
2243     for (j=0; j<bs; j++) {
2244       kmax = locrowlens[rowcount];
2245       for (k=0; k<kmax; k++) {
2246         tmp = mycols[nzcount++]/bs; /* block col. index */
2247         if (!mask[tmp]) {
2248           mask[tmp] = 1;
2249           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2250           else masked1[dcount++] = tmp; /* entry in diag portion */
2251         }
2252       }
2253       rowcount++;
2254     }
2255 
2256     dlens[i]  = dcount;  /* d_nzz[i] */
2257     odlens[i] = odcount; /* o_nzz[i] */
2258 
2259     /* zero out the mask elements we set */
2260     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2261     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2262   }
2263 
2264   /* create our matrix */
2265   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2266   ierr = MatSetType(A,type);CHKERRQ(ierr);
2267   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2268   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2269 
2270   if (!rank) {
2271     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2272     /* read in my part of the matrix numerical values  */
2273     nz = procsnz[0];
2274     vals = buf;
2275     mycols = ibuf;
2276     if (size == 1)  nz -= extra_rows;
2277     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2278     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2279 
2280     /* insert into matrix */
2281     jj      = rstart*bs;
2282     for (i=0; i<m; i++) {
2283       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2284       mycols += locrowlens[i];
2285       vals   += locrowlens[i];
2286       jj++;
2287     }
2288 
2289     /* read in other processors (except the last one) and ship out */
2290     for (i=1; i<size-1; i++) {
2291       nz   = procsnz[i];
2292       vals = buf;
2293       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2294       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2295     }
2296     /* the last proc */
2297     if (size != 1){
2298       nz   = procsnz[i] - extra_rows;
2299       vals = buf;
2300       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2301       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2302       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2303     }
2304     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2305 
2306   } else {
2307     /* receive numeric values */
2308     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2309 
2310     /* receive message of values*/
2311     vals   = buf;
2312     mycols = ibuf;
2313     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2314     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2315     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2316 
2317     /* insert into matrix */
2318     jj      = rstart*bs;
2319     for (i=0; i<m; i++) {
2320       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2321       mycols += locrowlens[i];
2322       vals   += locrowlens[i];
2323       jj++;
2324     }
2325   }
2326 
2327   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2328   ierr = PetscFree(buf);CHKERRQ(ierr);
2329   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2330   ierr = PetscFree(rowners);CHKERRQ(ierr);
2331   ierr = PetscFree(dlens);CHKERRQ(ierr);
2332   ierr = PetscFree(mask);CHKERRQ(ierr);
2333   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2335   *newmat = A;
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2341 /*@
2342    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2343 
2344    Input Parameters:
2345 .  mat  - the matrix
2346 .  fact - factor
2347 
2348    Collective on Mat
2349 
2350    Level: advanced
2351 
2352   Notes:
2353    This can also be set by the command line option: -mat_use_hash_table fact
2354 
2355 .keywords: matrix, hashtable, factor, HT
2356 
2357 .seealso: MatSetOption()
2358 @*/
2359 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2360 {
2361   PetscFunctionBegin;
2362   SETERRQ(1,"Function not yet written for SBAIJ format");
2363   /* PetscFunctionReturn(0); */
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2368 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2369 {
2370   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2371   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2372   PetscReal    atmp;
2373   PetscReal    *work,*svalues,*rvalues;
2374   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2375   int          rank,size,*rowners_bs,dest,count,source;
2376   PetscScalar  *va;
2377   MatScalar    *ba;
2378   MPI_Status   stat;
2379 
2380   PetscFunctionBegin;
2381   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2382   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2383 
2384   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2385   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2386 
2387   bs   = a->bs;
2388   mbs  = a->mbs;
2389   Mbs  = a->Mbs;
2390   ba   = b->a;
2391   bi   = b->i;
2392   bj   = b->j;
2393   /*
2394   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2395   PetscSynchronizedFlush(A->comm);
2396   */
2397 
2398   /* find ownerships */
2399   rowners_bs = a->rowners_bs;
2400   /*
2401   if (!rank){
2402     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2403   }
2404   */
2405 
2406   /* each proc creates an array to be distributed */
2407   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2408   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2409 
2410   /* row_max for B */
2411   if (rank != size-1){
2412     for (i=0; i<mbs; i++) {
2413       ncols = bi[1] - bi[0]; bi++;
2414       brow  = bs*i;
2415       for (j=0; j<ncols; j++){
2416         bcol = bs*(*bj);
2417         for (kcol=0; kcol<bs; kcol++){
2418           col = bcol + kcol;                 /* local col index */
2419           col += rowners_bs[rank+1];      /* global col index */
2420           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2421           for (krow=0; krow<bs; krow++){
2422             atmp = PetscAbsScalar(*ba); ba++;
2423             row = brow + krow;    /* local row index */
2424             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2425             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2426             if (work[col] < atmp) work[col] = atmp;
2427           }
2428         }
2429         bj++;
2430       }
2431     }
2432     /*
2433       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2434       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2435       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2436       */
2437 
2438     /* send values to its owners */
2439     for (dest=rank+1; dest<size; dest++){
2440       svalues = work + rowners_bs[dest];
2441       count   = rowners_bs[dest+1]-rowners_bs[dest];
2442       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2443       /*
2444       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2445       PetscSynchronizedFlush(A->comm);
2446       */
2447     }
2448   }
2449 
2450   /* receive values */
2451   if (rank){
2452     rvalues = work;
2453     count   = rowners_bs[rank+1]-rowners_bs[rank];
2454     for (source=0; source<rank; source++){
2455       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2456       /* process values */
2457       for (i=0; i<count; i++){
2458         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2459       }
2460       /*
2461       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2462       PetscSynchronizedFlush(A->comm);
2463       */
2464     }
2465   }
2466 
2467   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2468   ierr = PetscFree(work);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatRelax_MPISBAIJ"
2474 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2475 {
2476   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2477   int            ierr,mbs=mat->mbs,bs=mat->bs;
2478   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2479   Vec            bb1;
2480 
2481   PetscFunctionBegin;
2482   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2483   if (bs > 1)
2484     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2485 
2486   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2487     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2488       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2489       its--;
2490     }
2491 
2492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2493     while (its--){
2494 
2495       /* lower triangular part: slvec0b = - B^T*xx */
2496       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2497 
2498       /* copy xx into slvec0a */
2499       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2500       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2501       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2502       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2503 
2504       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2505 
2506       /* copy bb into slvec1a */
2507       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2508       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2509       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2510       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2511 
2512       /* set slvec1b = 0 */
2513       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2514 
2515       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2516       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2517       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2518       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2519 
2520       /* upper triangular part: bb1 = bb1 - B*x */
2521       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2522 
2523       /* local diagonal sweep */
2524       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2525     }
2526     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2527   } else {
2528     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2529   }
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 #undef __FUNCT__
2534 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2535 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2536 {
2537   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2538   int            ierr;
2539   PetscScalar    mone=-1.0;
2540   Vec            lvec1,bb1;
2541 
2542   PetscFunctionBegin;
2543   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2544   if (mat->bs > 1)
2545     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2546 
2547   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2548     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2549       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2550       its--;
2551     }
2552 
2553     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2554     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2555     while (its--){
2556       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2557 
2558       /* lower diagonal part: bb1 = bb - B^T*xx */
2559       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2560       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2561 
2562       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2563       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2564       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2565 
2566       /* upper diagonal part: bb1 = bb1 - B*x */
2567       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2568       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2569 
2570       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2571 
2572       /* diagonal sweep */
2573       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2574     }
2575     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2576     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2577   } else {
2578     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2579   }
2580   PetscFunctionReturn(0);
2581 }
2582 
2583