xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2aa32adbb5e8a0c7c8984d1934cdfb31775e9c6d)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
83 static int CreateColmap_MPISBAIJ_Private(Mat mat)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"Function not yet written for SBAIJ format");
87   /* PetscFunctionReturn(0); */
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
123  \
124         /* malloc new storage space */ \
125         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
127         new_j = (int*)(new_a + bs2*new_nz); \
128         new_i = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
199  \
200         /* malloc new storage space */ \
201         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
203         new_j = (int*)(new_a + bs2*new_nz); \
204         new_i = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNCT__
248 #define __FUNCT__ "MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int          ierr,i,N = m*n;
253   MatScalar    *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,"Function not yet written for SBAIJ format");
302   /* PetscFunctionReturn(0); */
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,"Function not yet written for SBAIJ format");
315   /* PetscFunctionReturn(0); */
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar    value;
328   PetscTruth   roworiented = baij->roworiented;
329   int          ierr,i,j,row,col;
330   int          rstart_orig=baij->rstart_bs;
331   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
332   int          cend_orig=baij->cend_bs,bs=baij->bs;
333 
334   /* Some Variables required in the macro */
335   Mat          A = baij->A;
336   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
337   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
338   MatScalar    *aa=a->a;
339 
340   Mat          B = baij->B;
341   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
342   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
343   MatScalar    *ba=b->a;
344 
345   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
346   int          low,high,t,ridx,cidx,bs2=a->bs2;
347   MatScalar    *ap,*bap;
348 
349   /* for stash */
350   int          n_loc, *in_loc=0;
351   MatScalar    *v_loc=0;
352 
353   PetscFunctionBegin;
354 
355   if(!baij->donotstash){
356     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
357     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
358   }
359 
360   for (i=0; i<m; i++) {
361     if (im[i] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
364 #endif
365     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
366       row = im[i] - rstart_orig;              /* local row index */
367       for (j=0; j<n; j++) {
368         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
369         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
370           col = in[j] - cstart_orig;          /* local col index */
371           brow = row/bs; bcol = col/bs;
372           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         } else if (in[j] < 0) continue;
377 #if defined(PETSC_USE_BOPT_g)
378         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
379 #endif
380         else {  /* off-diag entry (B) */
381           if (mat->was_assembled) {
382             if (!baij->colmap) {
383               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
384             }
385 #if defined (PETSC_USE_CTABLE)
386             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
387             col  = col - 1;
388 #else
389             col = baij->colmap[in[j]/bs] - 1;
390 #endif
391             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
395               B = baij->B;
396               b = (Mat_SeqBAIJ*)(B)->data;
397               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
398               ba=b->a;
399             } else col += in[j]%bs;
400           } else col = in[j];
401           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
402           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
403           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
404         }
405       }
406     } else {  /* off processor entry */
407       if (!baij->donotstash) {
408         n_loc = 0;
409         for (j=0; j<n; j++){
410           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
411           in_loc[n_loc] = in[j];
412           if (roworiented) {
413             v_loc[n_loc] = v[i*n+j];
414           } else {
415             v_loc[n_loc] = v[j*m+i];
416           }
417           n_loc++;
418         }
419         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
420       }
421     }
422   }
423 
424   if(!baij->donotstash){
425     ierr = PetscFree(in_loc);CHKERRQ(ierr);
426     ierr = PetscFree(v_loc);CHKERRQ(ierr);
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
434 {
435   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
436   const MatScalar *value;
437   MatScalar       *barray=baij->barray;
438   PetscTruth      roworiented = baij->roworiented;
439   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
440   int             rend=baij->rend,cstart=baij->cstart,stepval;
441   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
442 
443   PetscFunctionBegin;
444   if(!barray) {
445     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
446     baij->barray = barray;
447   }
448 
449   if (roworiented) {
450     stepval = (n-1)*bs;
451   } else {
452     stepval = (m-1)*bs;
453   }
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456 #if defined(PETSC_USE_BOPT_g)
457     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
458 #endif
459     if (im[i] >= rstart && im[i] < rend) {
460       row = im[i] - rstart;
461       for (j=0; j<n; j++) {
462         /* If NumCol = 1 then a copy is not required */
463         if ((roworiented) && (n == 1)) {
464           barray = (MatScalar*) v + i*bs2;
465         } else if((!roworiented) && (m == 1)) {
466           barray = (MatScalar*) v + j*bs2;
467         } else { /* Here a copy is required */
468           if (roworiented) {
469             value = v + i*(stepval+bs)*bs + j*bs;
470           } else {
471             value = v + j*(stepval+bs)*bs + i*bs;
472           }
473           for (ii=0; ii<bs; ii++,value+=stepval) {
474             for (jj=0; jj<bs; jj++) {
475               *barray++  = *value++;
476             }
477           }
478           barray -=bs2;
479         }
480 
481         if (in[j] >= cstart && in[j] < cend){
482           col  = in[j] - cstart;
483           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
484         }
485         else if (in[j] < 0) continue;
486 #if defined(PETSC_USE_BOPT_g)
487         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
488 #endif
489         else {
490           if (mat->was_assembled) {
491             if (!baij->colmap) {
492               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
493             }
494 
495 #if defined(PETSC_USE_BOPT_g)
496 #if defined (PETSC_USE_CTABLE)
497             { int data;
498               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
499               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
500             }
501 #else
502             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
503 #endif
504 #endif
505 #if defined (PETSC_USE_CTABLE)
506 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
507             col  = (col - 1)/bs;
508 #else
509             col = (baij->colmap[in[j]] - 1)/bs;
510 #endif
511             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
512               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
513               col =  in[j];
514             }
515           }
516           else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (!baij->donotstash) {
522         if (roworiented) {
523           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
524         } else {
525           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
526         }
527       }
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 #define HASH_KEY 0.6180339887
534 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
535 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
536 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
539 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(1,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
548 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
549 {
550   PetscFunctionBegin;
551   SETERRQ(1,"Function not yet written for SBAIJ format");
552   /* PetscFunctionReturn(0); */
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPISBAIJ"
557 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
558 {
559   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
560   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
561   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
562 
563   PetscFunctionBegin;
564   for (i=0; i<m; i++) {
565     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
566     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
567     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
568       row = idxm[i] - bsrstart;
569       for (j=0; j<n; j++) {
570         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
571         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
572         if (idxn[j] >= bscstart && idxn[j] < bscend){
573           col = idxn[j] - bscstart;
574           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575         } else {
576           if (!baij->colmap) {
577             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
578           }
579 #if defined (PETSC_USE_CTABLE)
580           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
581           data --;
582 #else
583           data = baij->colmap[idxn[j]/bs]-1;
584 #endif
585           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
586           else {
587             col  = data + idxn[j]%bs;
588             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596  PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatNorm_MPISBAIJ"
601 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
602 {
603   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
604   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
605   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
606   int        ierr;
607   PetscReal  sum[2],*lnorm2;
608 
609   PetscFunctionBegin;
610   if (baij->size == 1) {
611     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
612   } else {
613     if (type == NORM_FROBENIUS) {
614       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
615       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
616       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
617       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
618       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
619       /*
620       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
621       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
622       */
623       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
624       /*
625       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
626       PetscSynchronizedFlush(mat->comm); */
627 
628       *norm = sqrt(sum[0] + 2*sum[1]);
629       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
630     } else {
631       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
632     }
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 /*
638   Creates the hash table, and sets the table
639   This table is created only once.
640   If new entried need to be added to the matrix
641   then the hash table has to be destroyed and
642   recreated.
643 */
644 #undef __FUNCT__
645 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
646 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
647 {
648   PetscFunctionBegin;
649   SETERRQ(1,"Function not yet written for SBAIJ format");
650   /* PetscFunctionReturn(0); */
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
655 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
658   int         ierr,nstash,reallocs;
659   InsertMode  addv;
660 
661   PetscFunctionBegin;
662   if (baij->donotstash) {
663     PetscFunctionReturn(0);
664   }
665 
666   /* make sure all processors are either in INSERTMODE or ADDMODE */
667   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
668   if (addv == (ADD_VALUES|INSERT_VALUES)) {
669     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
670   }
671   mat->insertmode = addv; /* in case this processor had no cache */
672 
673   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
674   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
675   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
676   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
677   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
678   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
684 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
685 {
686   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
687   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
688   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
689   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
690   int         *row,*col,other_disassembled;
691   PetscTruth  r1,r2,r3;
692   MatScalar   *val;
693   InsertMode  addv = mat->insertmode;
694 
695   PetscFunctionBegin;
696 
697   if (!baij->donotstash) {
698     while (1) {
699       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
700       /*
701       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
702       PetscSynchronizedFlush(mat->comm);
703       */
704       if (!flg) break;
705 
706       for (i=0; i<n;) {
707         /* Now identify the consecutive vals belonging to the same row */
708         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
713         i = j;
714       }
715     }
716     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
717     /* Now process the block-stash. Since the values are stashed column-oriented,
718        set the roworiented flag to column oriented, and after MatSetValues()
719        restore the original flags */
720     r1 = baij->roworiented;
721     r2 = a->roworiented;
722     r3 = b->roworiented;
723     baij->roworiented = PETSC_FALSE;
724     a->roworiented    = PETSC_FALSE;
725     b->roworiented    = PETSC_FALSE;
726     while (1) {
727       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
728       if (!flg) break;
729 
730       for (i=0; i<n;) {
731         /* Now identify the consecutive vals belonging to the same row */
732         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
733         if (j < n) ncols = j-i;
734         else       ncols = n-i;
735         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
736         i = j;
737       }
738     }
739     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
740     baij->roworiented = r1;
741     a->roworiented    = r2;
742     b->roworiented    = r3;
743   }
744 
745   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
746   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
747 
748   /* determine if any processor has disassembled, if so we must
749      also disassemble ourselfs, in order that we may reassemble. */
750   /*
751      if nonzero structure of submatrix B cannot change then we know that
752      no processor disassembled thus we can skip this stuff
753   */
754   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
755     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
756     if (mat->was_assembled && !other_disassembled) {
757       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
762     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
763   }
764   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
765   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
766 
767 #if defined(PETSC_USE_BOPT_g)
768   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
769     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
770     baij->ht_total_ct  = 0;
771     baij->ht_insert_ct = 0;
772   }
773 #endif
774   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
775     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
776     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
777     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
778   }
779 
780   if (baij->rowvalues) {
781     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
782     baij->rowvalues = 0;
783   }
784 
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
790 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
791 {
792   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
793   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
794   PetscTruth        isascii,isdraw;
795   PetscViewer       sviewer;
796   PetscViewerFormat format;
797 
798   PetscFunctionBegin;
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
801   if (isascii) {
802     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
803     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
804       MatInfo info;
805       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
806       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
807       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
808               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
809               baij->bs,(int)info.memory);CHKERRQ(ierr);
810       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
812       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
814       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
815       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
816       PetscFunctionReturn(0);
817     } else if (format == PETSC_VIEWER_ASCII_INFO) {
818       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
819       PetscFunctionReturn(0);
820     }
821   }
822 
823   if (isdraw) {
824     PetscDraw       draw;
825     PetscTruth isnull;
826     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
827     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
828   }
829 
830   if (size == 1) {
831     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
832     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
833   } else {
834     /* assemble the entire matrix onto first processor. */
835     Mat         A;
836     Mat_SeqSBAIJ *Aloc;
837     Mat_SeqBAIJ *Bloc;
838     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
839     MatScalar   *a;
840 
841     if (!rank) {
842       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
843     } else {
844       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
845     }
846     PetscLogObjectParent(mat,A);
847 
848     /* copy over the A part */
849     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
850     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
851     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
852 
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = (baij->cstart+aj[j])*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     /* copy over the B part */
865     Bloc = (Mat_SeqBAIJ*)baij->B->data;
866     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
867     for (i=0; i<mbs; i++) {
868       rvals[0] = bs*(baij->rstart + i);
869       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
870       for (j=ai[i]; j<ai[i+1]; j++) {
871         col = baij->garray[aj[j]]*bs;
872         for (k=0; k<bs; k++) {
873           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
874           col++; a += bs;
875         }
876       }
877     }
878     ierr = PetscFree(rvals);CHKERRQ(ierr);
879     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
880     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
881     /*
882        Everyone has to call to draw the matrix since the graphics waits are
883        synchronized across all processors that share the PetscDraw object
884     */
885     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
886     if (!rank) {
887       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
888       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
889     }
890     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
891     ierr = MatDestroy(A);CHKERRQ(ierr);
892   }
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatView_MPISBAIJ"
898 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
899 {
900   int        ierr;
901   PetscTruth isascii,isdraw,issocket,isbinary;
902 
903   PetscFunctionBegin;
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
908   if (isascii || isdraw || issocket || isbinary) {
909     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
910   } else {
911     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatDestroy_MPISBAIJ"
918 int MatDestroy_MPISBAIJ(Mat mat)
919 {
920   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
921   int         ierr;
922 
923   PetscFunctionBegin;
924 #if defined(PETSC_USE_LOG)
925   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
926 #endif
927   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
928   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
929   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
930   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
931   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
932 #if defined (PETSC_USE_CTABLE)
933   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
934 #else
935   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
936 #endif
937   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
938   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
939   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
940   if (baij->slvec0) {
941     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
942     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
943   }
944   if (baij->slvec1) {
945     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
946     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
947     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
948   }
949   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
950   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
951   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
952   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
953 #if defined(PETSC_USE_MAT_SINGLE)
954   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
955 #endif
956   ierr = PetscFree(baij);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatMult_MPISBAIJ"
962 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
963 {
964   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
965   int         ierr,nt,mbs=a->mbs,bs=a->bs;
966   PetscScalar *x,*from,zero=0.0;
967 
968   PetscFunctionBegin;
969   /*
970   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
971   PetscSynchronizedFlush(A->comm);
972   */
973   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
974   if (nt != A->n) {
975     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
976   }
977   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
978   if (nt != A->m) {
979     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
980   }
981 
982   /* diagonal part */
983   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
984   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
985 
986   /* subdiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
988 
989   /* copy x into the vec slvec0 */
990   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
991   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
993   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
994 
995   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
996   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
998 
999   /* supperdiagonal part */
1000   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1001 
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1007 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1008 {
1009   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1010   int         ierr,nt;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1014   if (nt != A->n) {
1015     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1016   }
1017   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1018   if (nt != A->m) {
1019     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1020   }
1021 
1022   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1023   /* do diagonal part */
1024   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1025   /* do supperdiagonal part */
1026   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1027   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1028   /* do subdiagonal part */
1029   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1030   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1031   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1032 
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1038 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1039 {
1040   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1041   int          ierr,mbs=a->mbs,bs=a->bs;
1042   PetscScalar  *x,*from,zero=0.0;
1043 
1044   PetscFunctionBegin;
1045   /*
1046   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1047   PetscSynchronizedFlush(A->comm);
1048   */
1049   /* diagonal part */
1050   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1051   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1052 
1053   /* subdiagonal part */
1054   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1055 
1056   /* copy x into the vec slvec0 */
1057   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1058   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1059   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1060   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1061 
1062   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1063   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1064   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1065 
1066   /* supperdiagonal part */
1067   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1068 
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1074 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1075 {
1076   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1077   int        ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1081   /* do diagonal part */
1082   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* do supperdiagonal part */
1084   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1085   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1086 
1087   /* do subdiagonal part */
1088   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1089   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1097 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1098 {
1099   PetscFunctionBegin;
1100   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1101   /* PetscFunctionReturn(0); */
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1106 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1107 {
1108   PetscFunctionBegin;
1109   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1110   /* PetscFunctionReturn(0); */
1111 }
1112 
1113 /*
1114   This only works correctly for square matrices where the subblock A->A is the
1115    diagonal block
1116 */
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1119 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1120 {
1121   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1122   int         ierr;
1123 
1124   PetscFunctionBegin;
1125   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPISBAIJ"
1132 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1133 {
1134   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1135   int         ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1139   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1145 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1146 {
1147   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1148   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1149   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1150   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1151   int            *cmap,*idx_p,cstart = mat->cstart;
1152 
1153   PetscFunctionBegin;
1154   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1155   mat->getrowactive = PETSC_TRUE;
1156 
1157   if (!mat->rowvalues && (idx || v)) {
1158     /*
1159         allocate enough space to hold information from the longest row.
1160     */
1161     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1162     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1163     int     max = 1,mbs = mat->mbs,tmp;
1164     for (i=0; i<mbs; i++) {
1165       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1166       if (max < tmp) { max = tmp; }
1167     }
1168     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1169     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1170   }
1171 
1172   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1173   lrow = row - brstart;  /* local row index */
1174 
1175   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1176   if (!v)   {pvA = 0; pvB = 0;}
1177   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1178   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1179   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1180   nztot = nzA + nzB;
1181 
1182   cmap  = mat->garray;
1183   if (v  || idx) {
1184     if (nztot) {
1185       /* Sort by increasing column numbers, assuming A and B already sorted */
1186       int imark = -1;
1187       if (v) {
1188         *v = v_p = mat->rowvalues;
1189         for (i=0; i<nzB; i++) {
1190           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1191           else break;
1192         }
1193         imark = i;
1194         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1195         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1196       }
1197       if (idx) {
1198         *idx = idx_p = mat->rowindices;
1199         if (imark > -1) {
1200           for (i=0; i<imark; i++) {
1201             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1202           }
1203         } else {
1204           for (i=0; i<nzB; i++) {
1205             if (cmap[cworkB[i]/bs] < cstart)
1206               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1207             else break;
1208           }
1209           imark = i;
1210         }
1211         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1212         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1213       }
1214     } else {
1215       if (idx) *idx = 0;
1216       if (v)   *v   = 0;
1217     }
1218   }
1219   *nz = nztot;
1220   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1221   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1227 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1228 {
1229   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1230 
1231   PetscFunctionBegin;
1232   if (baij->getrowactive == PETSC_FALSE) {
1233     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1234   }
1235   baij->getrowactive = PETSC_FALSE;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1241 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1242 {
1243   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1244 
1245   PetscFunctionBegin;
1246   *bs = baij->bs;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1252 int MatZeroEntries_MPISBAIJ(Mat A)
1253 {
1254   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1255   int         ierr;
1256 
1257   PetscFunctionBegin;
1258   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1259   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1265 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1266 {
1267   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1268   Mat         A = a->A,B = a->B;
1269   int         ierr;
1270   PetscReal   isend[5],irecv[5];
1271 
1272   PetscFunctionBegin;
1273   info->block_size     = (PetscReal)a->bs;
1274   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1275   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1276   isend[3] = info->memory;  isend[4] = info->mallocs;
1277   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1278   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1279   isend[3] += info->memory;  isend[4] += info->mallocs;
1280   if (flag == MAT_LOCAL) {
1281     info->nz_used      = isend[0];
1282     info->nz_allocated = isend[1];
1283     info->nz_unneeded  = isend[2];
1284     info->memory       = isend[3];
1285     info->mallocs      = isend[4];
1286   } else if (flag == MAT_GLOBAL_MAX) {
1287     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1288     info->nz_used      = irecv[0];
1289     info->nz_allocated = irecv[1];
1290     info->nz_unneeded  = irecv[2];
1291     info->memory       = irecv[3];
1292     info->mallocs      = irecv[4];
1293   } else if (flag == MAT_GLOBAL_SUM) {
1294     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1295     info->nz_used      = irecv[0];
1296     info->nz_allocated = irecv[1];
1297     info->nz_unneeded  = irecv[2];
1298     info->memory       = irecv[3];
1299     info->mallocs      = irecv[4];
1300   } else {
1301     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1302   }
1303   info->rows_global       = (PetscReal)A->M;
1304   info->columns_global    = (PetscReal)A->N;
1305   info->rows_local        = (PetscReal)A->m;
1306   info->columns_local     = (PetscReal)A->N;
1307   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1308   info->fill_ratio_needed = 0;
1309   info->factor_mallocs    = 0;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1315 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1316 {
1317   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1318   int         ierr;
1319 
1320   PetscFunctionBegin;
1321   switch (op) {
1322   case MAT_NO_NEW_NONZERO_LOCATIONS:
1323   case MAT_YES_NEW_NONZERO_LOCATIONS:
1324   case MAT_COLUMNS_UNSORTED:
1325   case MAT_COLUMNS_SORTED:
1326   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1327   case MAT_KEEP_ZEROED_ROWS:
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1330     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1331     break;
1332   case MAT_ROW_ORIENTED:
1333     a->roworiented = PETSC_TRUE;
1334     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1335     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1336     break;
1337   case MAT_ROWS_SORTED:
1338   case MAT_ROWS_UNSORTED:
1339   case MAT_YES_NEW_DIAGONALS:
1340     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1341     break;
1342   case MAT_COLUMN_ORIENTED:
1343     a->roworiented = PETSC_FALSE;
1344     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1345     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1346     break;
1347   case MAT_IGNORE_OFF_PROC_ENTRIES:
1348     a->donotstash = PETSC_TRUE;
1349     break;
1350   case MAT_NO_NEW_DIAGONALS:
1351     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1352   case MAT_USE_HASH_TABLE:
1353     a->ht_flag = PETSC_TRUE;
1354     break;
1355   default:
1356     SETERRQ(PETSC_ERR_SUP,"unknown option");
1357   }
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1363 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1364 {
1365   int ierr;
1366   PetscFunctionBegin;
1367   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1373 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1374 {
1375   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1376   Mat         a = baij->A,b = baij->B;
1377   int         ierr,s1,s2,s3;
1378 
1379   PetscFunctionBegin;
1380   if (ll != rr) {
1381     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1382   }
1383   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1384   if (rr) {
1385     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1386     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1387     /* Overlap communication with computation. */
1388     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1389     /*} if (ll) { */
1390     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1391     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1392     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1393     /* } */
1394   /* scale  the diagonal block */
1395   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1396 
1397   /* if (rr) { */
1398     /* Do a scatter end and then right scale the off-diagonal block */
1399     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1400     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1401   }
1402 
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1408 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1409 {
1410   PetscFunctionBegin;
1411   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1416 int MatPrintHelp_MPISBAIJ(Mat A)
1417 {
1418   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1419   MPI_Comm    comm = A->comm;
1420   static int  called = 0;
1421   int         ierr;
1422 
1423   PetscFunctionBegin;
1424   if (!a->rank) {
1425     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1426   }
1427   if (called) {PetscFunctionReturn(0);} else called = 1;
1428   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1429   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1435 int MatSetUnfactored_MPISBAIJ(Mat A)
1436 {
1437   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1438   int         ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatEqual_MPISBAIJ"
1449 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1450 {
1451   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1452   Mat         a,b,c,d;
1453   PetscTruth  flg;
1454   int         ierr;
1455 
1456   PetscFunctionBegin;
1457   a = matA->A; b = matA->B;
1458   c = matB->A; d = matB->B;
1459 
1460   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1461   if (flg == PETSC_TRUE) {
1462     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1463   }
1464   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1470 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1471 {
1472   int        ierr;
1473 
1474   PetscFunctionBegin;
1475   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1476   PetscFunctionReturn(0);
1477 }
1478 /* -------------------------------------------------------------------*/
1479 static struct _MatOps MatOps_Values = {
1480        MatSetValues_MPISBAIJ,
1481        MatGetRow_MPISBAIJ,
1482        MatRestoreRow_MPISBAIJ,
1483        MatMult_MPISBAIJ,
1484 /* 4*/ MatMultAdd_MPISBAIJ,
1485        MatMultTranspose_MPISBAIJ,
1486        MatMultTransposeAdd_MPISBAIJ,
1487        0,
1488        0,
1489        0,
1490 /*10*/ 0,
1491        0,
1492        0,
1493        MatRelax_MPISBAIJ,
1494        MatTranspose_MPISBAIJ,
1495 /*15*/ MatGetInfo_MPISBAIJ,
1496        MatEqual_MPISBAIJ,
1497        MatGetDiagonal_MPISBAIJ,
1498        MatDiagonalScale_MPISBAIJ,
1499        MatNorm_MPISBAIJ,
1500 /*20*/ MatAssemblyBegin_MPISBAIJ,
1501        MatAssemblyEnd_MPISBAIJ,
1502        0,
1503        MatSetOption_MPISBAIJ,
1504        MatZeroEntries_MPISBAIJ,
1505 /*25*/ MatZeroRows_MPISBAIJ,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*35*/ MatDuplicate_MPISBAIJ,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*40*/ 0,
1521        0,
1522        0,
1523        MatGetValues_MPISBAIJ,
1524        0,
1525 /*45*/ MatPrintHelp_MPISBAIJ,
1526        MatScale_MPISBAIJ,
1527        0,
1528        0,
1529        0,
1530 /*50*/ MatGetBlockSize_MPISBAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*55*/ 0,
1536        0,
1537        MatSetUnfactored_MPISBAIJ,
1538        0,
1539        MatSetValuesBlocked_MPISBAIJ,
1540 /*60*/ 0,
1541        0,
1542        0,
1543        MatGetPetscMaps_Petsc,
1544        0,
1545 /*65*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*70*/ MatGetRowMax_MPISBAIJ,
1551        0,
1552        0,
1553        0,
1554        0,
1555 /*75*/ 0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*80*/ 0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*85*/ MatLoad_MPISBAIJ
1566 };
1567 
1568 
1569 EXTERN_C_BEGIN
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1572 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1573 {
1574   PetscFunctionBegin;
1575   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1576   *iscopy = PETSC_FALSE;
1577   PetscFunctionReturn(0);
1578 }
1579 EXTERN_C_END
1580 
1581 EXTERN_C_BEGIN
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1584 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1585 {
1586   Mat_MPISBAIJ *b;
1587   int          ierr,i,mbs,Mbs;
1588 
1589   PetscFunctionBegin;
1590   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1591 
1592   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1593   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1594   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1595   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1596   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1597   if (d_nnz) {
1598     for (i=0; i<B->m/bs; i++) {
1599       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1600     }
1601   }
1602   if (o_nnz) {
1603     for (i=0; i<B->m/bs; i++) {
1604       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1605     }
1606   }
1607   B->preallocated = PETSC_TRUE;
1608   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1609   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1610   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1611   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1612 
1613   b   = (Mat_MPISBAIJ*)B->data;
1614   mbs = B->m/bs;
1615   Mbs = B->M/bs;
1616   if (mbs*bs != B->m) {
1617     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1618   }
1619 
1620   b->bs  = bs;
1621   b->bs2 = bs*bs;
1622   b->mbs = mbs;
1623   b->nbs = mbs;
1624   b->Mbs = Mbs;
1625   b->Nbs = Mbs;
1626 
1627   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1628   b->rowners[0]    = 0;
1629   for (i=2; i<=b->size; i++) {
1630     b->rowners[i] += b->rowners[i-1];
1631   }
1632   b->rstart    = b->rowners[b->rank];
1633   b->rend      = b->rowners[b->rank+1];
1634   b->cstart    = b->rstart;
1635   b->cend      = b->rend;
1636   for (i=0; i<=b->size; i++) {
1637     b->rowners_bs[i] = b->rowners[i]*bs;
1638   }
1639   b->rstart_bs = b-> rstart*bs;
1640   b->rend_bs   = b->rend*bs;
1641 
1642   b->cstart_bs = b->cstart*bs;
1643   b->cend_bs   = b->cend*bs;
1644 
1645 
1646   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1647   PetscLogObjectParent(B,b->A);
1648   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1649   PetscLogObjectParent(B,b->B);
1650 
1651   /* build cache for off array entries formed */
1652   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1653 
1654   PetscFunctionReturn(0);
1655 }
1656 EXTERN_C_END
1657 
1658 /*MC
1659    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1660    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1661 
1662    Options Database Keys:
1663 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1664 
1665   Level: beginner
1666 
1667 .seealso: MatCreateMPISBAIJ
1668 M*/
1669 
1670 EXTERN_C_BEGIN
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatCreate_MPISBAIJ"
1673 int MatCreate_MPISBAIJ(Mat B)
1674 {
1675   Mat_MPISBAIJ *b;
1676   int          ierr;
1677   PetscTruth   flg;
1678 
1679   PetscFunctionBegin;
1680 
1681   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1682   B->data = (void*)b;
1683   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1684   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1685 
1686   B->ops->destroy    = MatDestroy_MPISBAIJ;
1687   B->ops->view       = MatView_MPISBAIJ;
1688   B->mapping    = 0;
1689   B->factor     = 0;
1690   B->assembled  = PETSC_FALSE;
1691 
1692   B->insertmode = NOT_SET_VALUES;
1693   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1694   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1695 
1696   /* build local table of row and column ownerships */
1697   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1698   b->cowners    = b->rowners + b->size + 2;
1699   b->rowners_bs = b->cowners + b->size + 2;
1700   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1701 
1702   /* build cache for off array entries formed */
1703   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1704   b->donotstash  = PETSC_FALSE;
1705   b->colmap      = PETSC_NULL;
1706   b->garray      = PETSC_NULL;
1707   b->roworiented = PETSC_TRUE;
1708 
1709 #if defined(PETSC_USE_MAT_SINGLE)
1710   /* stuff for MatSetValues_XXX in single precision */
1711   b->setvalueslen     = 0;
1712   b->setvaluescopy    = PETSC_NULL;
1713 #endif
1714 
1715   /* stuff used in block assembly */
1716   b->barray       = 0;
1717 
1718   /* stuff used for matrix vector multiply */
1719   b->lvec         = 0;
1720   b->Mvctx        = 0;
1721   b->slvec0       = 0;
1722   b->slvec0b      = 0;
1723   b->slvec1       = 0;
1724   b->slvec1a      = 0;
1725   b->slvec1b      = 0;
1726   b->sMvctx       = 0;
1727 
1728   /* stuff for MatGetRow() */
1729   b->rowindices   = 0;
1730   b->rowvalues    = 0;
1731   b->getrowactive = PETSC_FALSE;
1732 
1733   /* hash table stuff */
1734   b->ht           = 0;
1735   b->hd           = 0;
1736   b->ht_size      = 0;
1737   b->ht_flag      = PETSC_FALSE;
1738   b->ht_fact      = 0;
1739   b->ht_total_ct  = 0;
1740   b->ht_insert_ct = 0;
1741 
1742   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1743   if (flg) {
1744     PetscReal fact = 1.39;
1745     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1746     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1747     if (fact <= 1.0) fact = 1.39;
1748     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1749     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1750   }
1751   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1752                                      "MatStoreValues_MPISBAIJ",
1753                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1754   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1755                                      "MatRetrieveValues_MPISBAIJ",
1756                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1757   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1758                                      "MatGetDiagonalBlock_MPISBAIJ",
1759                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1760   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1761                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1762                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 EXTERN_C_END
1766 
1767 /*MC
1768    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1769 
1770    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1771    and MATMPISBAIJ otherwise.
1772 
1773    Options Database Keys:
1774 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1775 
1776   Level: beginner
1777 
1778 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1779 M*/
1780 
1781 EXTERN_C_BEGIN
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatCreate_SBAIJ"
1784 int MatCreate_SBAIJ(Mat A) {
1785   int ierr,size;
1786 
1787   PetscFunctionBegin;
1788   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1789   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1790   if (size == 1) {
1791     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1792   } else {
1793     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1794   }
1795   PetscFunctionReturn(0);
1796 }
1797 EXTERN_C_END
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1801 /*@C
1802    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1803    the user should preallocate the matrix storage by setting the parameters
1804    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1805    performance can be increased by more than a factor of 50.
1806 
1807    Collective on Mat
1808 
1809    Input Parameters:
1810 +  A - the matrix
1811 .  bs   - size of blockk
1812 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1813            submatrix  (same for all local rows)
1814 .  d_nnz - array containing the number of block nonzeros in the various block rows
1815            in the upper triangular and diagonal part of the in diagonal portion of the local
1816            (possibly different for each block row) or PETSC_NULL.  You must leave room
1817            for the diagonal entry even if it is zero.
1818 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1819            submatrix (same for all local rows).
1820 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1821            off-diagonal portion of the local submatrix (possibly different for
1822            each block row) or PETSC_NULL.
1823 
1824 
1825    Options Database Keys:
1826 .   -mat_no_unroll - uses code that does not unroll the loops in the
1827                      block calculations (much slower)
1828 .   -mat_block_size - size of the blocks to use
1829 
1830    Notes:
1831 
1832    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1833    than it must be used on all processors that share the object for that argument.
1834 
1835    Storage Information:
1836    For a square global matrix we define each processor's diagonal portion
1837    to be its local rows and the corresponding columns (a square submatrix);
1838    each processor's off-diagonal portion encompasses the remainder of the
1839    local matrix (a rectangular submatrix).
1840 
1841    The user can specify preallocated storage for the diagonal part of
1842    the local submatrix with either d_nz or d_nnz (not both).  Set
1843    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1844    memory allocation.  Likewise, specify preallocated storage for the
1845    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1846 
1847    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1848    the figure below we depict these three local rows and all columns (0-11).
1849 
1850 .vb
1851            0 1 2 3 4 5 6 7 8 9 10 11
1852           -------------------
1853    row 3  |  o o o d d d o o o o o o
1854    row 4  |  o o o d d d o o o o o o
1855    row 5  |  o o o d d d o o o o o o
1856           -------------------
1857 .ve
1858 
1859    Thus, any entries in the d locations are stored in the d (diagonal)
1860    submatrix, and any entries in the o locations are stored in the
1861    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1862    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1863 
1864    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1865    plus the diagonal part of the d matrix,
1866    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1867    In general, for PDE problems in which most nonzeros are near the diagonal,
1868    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1869    or you will get TERRIBLE performance; see the users' manual chapter on
1870    matrices.
1871 
1872    Level: intermediate
1873 
1874 .keywords: matrix, block, aij, compressed row, sparse, parallel
1875 
1876 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1877 @*/
1878 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1879 {
1880   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1881 
1882   PetscFunctionBegin;
1883   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1884   if (f) {
1885     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatCreateMPISBAIJ"
1892 /*@C
1893    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1894    (block compressed row).  For good matrix assembly performance
1895    the user should preallocate the matrix storage by setting the parameters
1896    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1897    performance can be increased by more than a factor of 50.
1898 
1899    Collective on MPI_Comm
1900 
1901    Input Parameters:
1902 +  comm - MPI communicator
1903 .  bs   - size of blockk
1904 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1905            This value should be the same as the local size used in creating the
1906            y vector for the matrix-vector product y = Ax.
1907 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1908            This value should be the same as the local size used in creating the
1909            x vector for the matrix-vector product y = Ax.
1910 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1911 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1912 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1913            submatrix  (same for all local rows)
1914 .  d_nnz - array containing the number of block nonzeros in the various block rows
1915            in the upper triangular portion of the in diagonal portion of the local
1916            (possibly different for each block block row) or PETSC_NULL.
1917            You must leave room for the diagonal entry even if it is zero.
1918 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1919            submatrix (same for all local rows).
1920 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1921            off-diagonal portion of the local submatrix (possibly different for
1922            each block row) or PETSC_NULL.
1923 
1924    Output Parameter:
1925 .  A - the matrix
1926 
1927    Options Database Keys:
1928 .   -mat_no_unroll - uses code that does not unroll the loops in the
1929                      block calculations (much slower)
1930 .   -mat_block_size - size of the blocks to use
1931 .   -mat_mpi - use the parallel matrix data structures even on one processor
1932                (defaults to using SeqBAIJ format on one processor)
1933 
1934    Notes:
1935    The user MUST specify either the local or global matrix dimensions
1936    (possibly both).
1937 
1938    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1939    than it must be used on all processors that share the object for that argument.
1940 
1941    Storage Information:
1942    For a square global matrix we define each processor's diagonal portion
1943    to be its local rows and the corresponding columns (a square submatrix);
1944    each processor's off-diagonal portion encompasses the remainder of the
1945    local matrix (a rectangular submatrix).
1946 
1947    The user can specify preallocated storage for the diagonal part of
1948    the local submatrix with either d_nz or d_nnz (not both).  Set
1949    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1950    memory allocation.  Likewise, specify preallocated storage for the
1951    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1952 
1953    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1954    the figure below we depict these three local rows and all columns (0-11).
1955 
1956 .vb
1957            0 1 2 3 4 5 6 7 8 9 10 11
1958           -------------------
1959    row 3  |  o o o d d d o o o o o o
1960    row 4  |  o o o d d d o o o o o o
1961    row 5  |  o o o d d d o o o o o o
1962           -------------------
1963 .ve
1964 
1965    Thus, any entries in the d locations are stored in the d (diagonal)
1966    submatrix, and any entries in the o locations are stored in the
1967    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1968    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1969 
1970    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1971    plus the diagonal part of the d matrix,
1972    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1973    In general, for PDE problems in which most nonzeros are near the diagonal,
1974    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1975    or you will get TERRIBLE performance; see the users' manual chapter on
1976    matrices.
1977 
1978    Level: intermediate
1979 
1980 .keywords: matrix, block, aij, compressed row, sparse, parallel
1981 
1982 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1983 @*/
1984 
1985 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
1986 {
1987   int ierr,size;
1988 
1989   PetscFunctionBegin;
1990   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1991   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1992   if (size > 1) {
1993     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1994     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1995   } else {
1996     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1997     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2005 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2006 {
2007   Mat          mat;
2008   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2009   int          ierr,len=0;
2010 
2011   PetscFunctionBegin;
2012   *newmat       = 0;
2013   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2014   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2015   mat->preallocated = PETSC_TRUE;
2016   a = (Mat_MPISBAIJ*)mat->data;
2017   a->bs  = oldmat->bs;
2018   a->bs2 = oldmat->bs2;
2019   a->mbs = oldmat->mbs;
2020   a->nbs = oldmat->nbs;
2021   a->Mbs = oldmat->Mbs;
2022   a->Nbs = oldmat->Nbs;
2023 
2024   a->rstart       = oldmat->rstart;
2025   a->rend         = oldmat->rend;
2026   a->cstart       = oldmat->cstart;
2027   a->cend         = oldmat->cend;
2028   a->size         = oldmat->size;
2029   a->rank         = oldmat->rank;
2030   a->donotstash   = oldmat->donotstash;
2031   a->roworiented  = oldmat->roworiented;
2032   a->rowindices   = 0;
2033   a->rowvalues    = 0;
2034   a->getrowactive = PETSC_FALSE;
2035   a->barray       = 0;
2036   a->rstart_bs    = oldmat->rstart_bs;
2037   a->rend_bs      = oldmat->rend_bs;
2038   a->cstart_bs    = oldmat->cstart_bs;
2039   a->cend_bs      = oldmat->cend_bs;
2040 
2041   /* hash table stuff */
2042   a->ht           = 0;
2043   a->hd           = 0;
2044   a->ht_size      = 0;
2045   a->ht_flag      = oldmat->ht_flag;
2046   a->ht_fact      = oldmat->ht_fact;
2047   a->ht_total_ct  = 0;
2048   a->ht_insert_ct = 0;
2049 
2050   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2051   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2052   a->cowners    = a->rowners + a->size + 2;
2053   a->rowners_bs = a->cowners + a->size + 2;
2054   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2055   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2056   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2057   if (oldmat->colmap) {
2058 #if defined (PETSC_USE_CTABLE)
2059     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2060 #else
2061     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2062     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2063     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2064 #endif
2065   } else a->colmap = 0;
2066   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2067     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2068     PetscLogObjectMemory(mat,len*sizeof(int));
2069     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2070   } else a->garray = 0;
2071 
2072   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2073   PetscLogObjectParent(mat,a->lvec);
2074   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2075 
2076   PetscLogObjectParent(mat,a->Mvctx);
2077   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2078   PetscLogObjectParent(mat,a->A);
2079   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2080   PetscLogObjectParent(mat,a->B);
2081   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2082   *newmat = mat;
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #include "petscsys.h"
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatLoad_MPISBAIJ"
2090 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2091 {
2092   Mat          A;
2093   int          i,nz,ierr,j,rstart,rend,fd;
2094   PetscScalar  *vals,*buf;
2095   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2096   MPI_Status   status;
2097   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2098   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2099   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2100   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2101   int          dcount,kmax,k,nzcount,tmp;
2102 
2103   PetscFunctionBegin;
2104   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2105 
2106   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2108   if (!rank) {
2109     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2110     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2111     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2112     if (header[3] < 0) {
2113       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2114     }
2115   }
2116 
2117   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2118   M = header[1]; N = header[2];
2119 
2120   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2121 
2122   /*
2123      This code adds extra rows to make sure the number of rows is
2124      divisible by the blocksize
2125   */
2126   Mbs        = M/bs;
2127   extra_rows = bs - M + bs*(Mbs);
2128   if (extra_rows == bs) extra_rows = 0;
2129   else                  Mbs++;
2130   if (extra_rows &&!rank) {
2131     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2132   }
2133 
2134   /* determine ownership of all rows */
2135   mbs        = Mbs/size + ((Mbs % size) > rank);
2136   m          = mbs*bs;
2137   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2138   browners   = rowners + size + 1;
2139   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2140   rowners[0] = 0;
2141   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2142   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2143   rstart = rowners[rank];
2144   rend   = rowners[rank+1];
2145 
2146   /* distribute row lengths to all processors */
2147   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2148   if (!rank) {
2149     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2150     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2151     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2152     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2153     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2154     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2155     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2156   } else {
2157     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2158   }
2159 
2160   if (!rank) {   /* procs[0] */
2161     /* calculate the number of nonzeros on each processor */
2162     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2163     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2164     for (i=0; i<size; i++) {
2165       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2166         procsnz[i] += rowlengths[j];
2167       }
2168     }
2169     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2170 
2171     /* determine max buffer needed and allocate it */
2172     maxnz = 0;
2173     for (i=0; i<size; i++) {
2174       maxnz = PetscMax(maxnz,procsnz[i]);
2175     }
2176     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2177 
2178     /* read in my part of the matrix column indices  */
2179     nz     = procsnz[0];
2180     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2181     mycols = ibuf;
2182     if (size == 1)  nz -= extra_rows;
2183     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2184     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2185 
2186     /* read in every ones (except the last) and ship off */
2187     for (i=1; i<size-1; i++) {
2188       nz   = procsnz[i];
2189       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2190       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2191     }
2192     /* read in the stuff for the last proc */
2193     if (size != 1) {
2194       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2195       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2196       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2197       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2198     }
2199     ierr = PetscFree(cols);CHKERRQ(ierr);
2200   } else {  /* procs[i], i>0 */
2201     /* determine buffer space needed for message */
2202     nz = 0;
2203     for (i=0; i<m; i++) {
2204       nz += locrowlens[i];
2205     }
2206     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2207     mycols = ibuf;
2208     /* receive message of column indices*/
2209     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2210     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2211     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2212   }
2213 
2214   /* loop over local rows, determining number of off diagonal entries */
2215   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2216   odlens   = dlens + (rend-rstart);
2217   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2218   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2219   masked1  = mask    + Mbs;
2220   masked2  = masked1 + Mbs;
2221   rowcount = 0; nzcount = 0;
2222   for (i=0; i<mbs; i++) {
2223     dcount  = 0;
2224     odcount = 0;
2225     for (j=0; j<bs; j++) {
2226       kmax = locrowlens[rowcount];
2227       for (k=0; k<kmax; k++) {
2228         tmp = mycols[nzcount++]/bs; /* block col. index */
2229         if (!mask[tmp]) {
2230           mask[tmp] = 1;
2231           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2232           else masked1[dcount++] = tmp; /* entry in diag portion */
2233         }
2234       }
2235       rowcount++;
2236     }
2237 
2238     dlens[i]  = dcount;  /* d_nzz[i] */
2239     odlens[i] = odcount; /* o_nzz[i] */
2240 
2241     /* zero out the mask elements we set */
2242     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2243     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2244   }
2245 
2246   /* create our matrix */
2247   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2248   ierr = MatSetType(A,type);CHKERRQ(ierr);
2249   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2250   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2251 
2252   if (!rank) {
2253     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2254     /* read in my part of the matrix numerical values  */
2255     nz = procsnz[0];
2256     vals = buf;
2257     mycols = ibuf;
2258     if (size == 1)  nz -= extra_rows;
2259     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2260     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2261 
2262     /* insert into matrix */
2263     jj      = rstart*bs;
2264     for (i=0; i<m; i++) {
2265       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2266       mycols += locrowlens[i];
2267       vals   += locrowlens[i];
2268       jj++;
2269     }
2270 
2271     /* read in other processors (except the last one) and ship out */
2272     for (i=1; i<size-1; i++) {
2273       nz   = procsnz[i];
2274       vals = buf;
2275       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2276       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2277     }
2278     /* the last proc */
2279     if (size != 1){
2280       nz   = procsnz[i] - extra_rows;
2281       vals = buf;
2282       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2283       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2284       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2285     }
2286     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2287 
2288   } else {
2289     /* receive numeric values */
2290     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2291 
2292     /* receive message of values*/
2293     vals   = buf;
2294     mycols = ibuf;
2295     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2296     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2297     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2298 
2299     /* insert into matrix */
2300     jj      = rstart*bs;
2301     for (i=0; i<m; i++) {
2302       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2303       mycols += locrowlens[i];
2304       vals   += locrowlens[i];
2305       jj++;
2306     }
2307   }
2308 
2309   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2310   ierr = PetscFree(buf);CHKERRQ(ierr);
2311   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2312   ierr = PetscFree(rowners);CHKERRQ(ierr);
2313   ierr = PetscFree(dlens);CHKERRQ(ierr);
2314   ierr = PetscFree(mask);CHKERRQ(ierr);
2315   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2316   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317   *newmat = A;
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2323 /*@
2324    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2325 
2326    Input Parameters:
2327 .  mat  - the matrix
2328 .  fact - factor
2329 
2330    Collective on Mat
2331 
2332    Level: advanced
2333 
2334   Notes:
2335    This can also be set by the command line option: -mat_use_hash_table fact
2336 
2337 .keywords: matrix, hashtable, factor, HT
2338 
2339 .seealso: MatSetOption()
2340 @*/
2341 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2342 {
2343   PetscFunctionBegin;
2344   SETERRQ(1,"Function not yet written for SBAIJ format");
2345   /* PetscFunctionReturn(0); */
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2350 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2351 {
2352   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2353   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2354   PetscReal    atmp;
2355   PetscReal    *work,*svalues,*rvalues;
2356   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2357   int          rank,size,*rowners_bs,dest,count,source;
2358   PetscScalar  *va;
2359   MatScalar    *ba;
2360   MPI_Status   stat;
2361 
2362   PetscFunctionBegin;
2363   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2364   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2365 
2366   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2367   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2368 
2369   bs   = a->bs;
2370   mbs  = a->mbs;
2371   Mbs  = a->Mbs;
2372   ba   = b->a;
2373   bi   = b->i;
2374   bj   = b->j;
2375   /*
2376   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2377   PetscSynchronizedFlush(A->comm);
2378   */
2379 
2380   /* find ownerships */
2381   rowners_bs = a->rowners_bs;
2382   /*
2383   if (!rank){
2384     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2385   }
2386   */
2387 
2388   /* each proc creates an array to be distributed */
2389   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2390   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2391 
2392   /* row_max for B */
2393   if (rank != size-1){
2394     for (i=0; i<mbs; i++) {
2395       ncols = bi[1] - bi[0]; bi++;
2396       brow  = bs*i;
2397       for (j=0; j<ncols; j++){
2398         bcol = bs*(*bj);
2399         for (kcol=0; kcol<bs; kcol++){
2400           col = bcol + kcol;                 /* local col index */
2401           col += rowners_bs[rank+1];      /* global col index */
2402           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2403           for (krow=0; krow<bs; krow++){
2404             atmp = PetscAbsScalar(*ba); ba++;
2405             row = brow + krow;    /* local row index */
2406             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2407             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2408             if (work[col] < atmp) work[col] = atmp;
2409           }
2410         }
2411         bj++;
2412       }
2413     }
2414     /*
2415       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2416       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2417       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2418       */
2419 
2420     /* send values to its owners */
2421     for (dest=rank+1; dest<size; dest++){
2422       svalues = work + rowners_bs[dest];
2423       count   = rowners_bs[dest+1]-rowners_bs[dest];
2424       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2425       /*
2426       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2427       PetscSynchronizedFlush(A->comm);
2428       */
2429     }
2430   }
2431 
2432   /* receive values */
2433   if (rank){
2434     rvalues = work;
2435     count   = rowners_bs[rank+1]-rowners_bs[rank];
2436     for (source=0; source<rank; source++){
2437       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2438       /* process values */
2439       for (i=0; i<count; i++){
2440         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2441       }
2442       /*
2443       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2444       PetscSynchronizedFlush(A->comm);
2445       */
2446     }
2447   }
2448 
2449   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2450   ierr = PetscFree(work);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatRelax_MPISBAIJ"
2456 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2457 {
2458   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2459   int            ierr,mbs=mat->mbs,bs=mat->bs;
2460   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2461   Vec            bb1;
2462 
2463   PetscFunctionBegin;
2464   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2465   if (bs > 1)
2466     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2467 
2468   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2469     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2470       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2471       its--;
2472     }
2473 
2474     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2475     while (its--){
2476 
2477       /* lower triangular part: slvec0b = - B^T*xx */
2478       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2479 
2480       /* copy xx into slvec0a */
2481       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2482       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2483       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2484       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2485 
2486       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2487 
2488       /* copy bb into slvec1a */
2489       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2490       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2491       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2492       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2493 
2494       /* set slvec1b = 0 */
2495       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2496 
2497       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2498       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2499       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2500       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2501 
2502       /* upper triangular part: bb1 = bb1 - B*x */
2503       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2504 
2505       /* local diagonal sweep */
2506       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2507     }
2508     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2509   } else {
2510     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2511   }
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 #undef __FUNCT__
2516 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2517 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2518 {
2519   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2520   int            ierr;
2521   PetscScalar    mone=-1.0;
2522   Vec            lvec1,bb1;
2523 
2524   PetscFunctionBegin;
2525   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2526   if (mat->bs > 1)
2527     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2528 
2529   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2530     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2531       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2532       its--;
2533     }
2534 
2535     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2536     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2537     while (its--){
2538       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2539 
2540       /* lower diagonal part: bb1 = bb - B^T*xx */
2541       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2542       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2543 
2544       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2545       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2546       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2547 
2548       /* upper diagonal part: bb1 = bb1 - B*x */
2549       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2550       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2551 
2552       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2553 
2554       /* diagonal sweep */
2555       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2556     }
2557     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2558     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2559   } else {
2560     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565