xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 737d121a2b57b182d532aa04ba3c4e4d7f0dd66e)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   int i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   int i,N = m*n*b->bs2;
263   MatScalar   *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   int i,N = m*n;
286   MatScalar   *vsingle;
287 
288   PetscFunctionBegin;
289   SETERRQ(1,"Function not yet written for SBAIJ format");
290   /* PetscFunctionReturn(0); */
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   int i,N = m*n*b->bs2;
300   MatScalar   *vsingle;
301 
302   PetscFunctionBegin;
303   SETERRQ(1,"Function not yet written for SBAIJ format");
304   /* PetscFunctionReturn(0); */
305 }
306 #endif
307 
308 /* Only add/insert a(i,j) with i<=j (blocks).
309    Any a(i,j) with i>j input by user is ingored.
310 */
311 #undef __FUNCT__
312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
314 {
315   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
316   MatScalar    value;
317   PetscTruth   roworiented = baij->roworiented;
318   PetscErrorCode ierr;
319   int i,j,row,col;
320   int          rstart_orig=baij->rstart_bs;
321   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
322   int          cend_orig=baij->cend_bs,bs=baij->bs;
323 
324   /* Some Variables required in the macro */
325   Mat          A = baij->A;
326   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
327   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
328   MatScalar    *aa=a->a;
329 
330   Mat          B = baij->B;
331   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
332   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
333   MatScalar    *ba=b->a;
334 
335   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
336   int          low,high,t,ridx,cidx,bs2=a->bs2;
337   MatScalar    *ap,*bap;
338 
339   /* for stash */
340   int          n_loc, *in_loc=0;
341   MatScalar    *v_loc=0;
342 
343   PetscFunctionBegin;
344 
345   if(!baij->donotstash){
346     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
347     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
348   }
349 
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_BOPT_g)
353     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
354 #endif
355     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
356       row = im[i] - rstart_orig;              /* local row index */
357       for (j=0; j<n; j++) {
358         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
359         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
360           col = in[j] - cstart_orig;          /* local col index */
361           brow = row/bs; bcol = col/bs;
362           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         } else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
369 #endif
370         else {  /* off-diag entry (B) */
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 #if defined (PETSC_USE_CTABLE)
376             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
377             col  = col - 1;
378 #else
379             col = baij->colmap[in[j]/bs] - 1;
380 #endif
381             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
382               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
383               col =  in[j];
384               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
385               B = baij->B;
386               b = (Mat_SeqBAIJ*)(B)->data;
387               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
388               ba=b->a;
389             } else col += in[j]%bs;
390           } else col = in[j];
391           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
392           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
393           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
394         }
395       }
396     } else {  /* off processor entry */
397       if (!baij->donotstash) {
398         n_loc = 0;
399         for (j=0; j<n; j++){
400           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
401           in_loc[n_loc] = in[j];
402           if (roworiented) {
403             v_loc[n_loc] = v[i*n+j];
404           } else {
405             v_loc[n_loc] = v[j*m+i];
406           }
407           n_loc++;
408         }
409         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
410       }
411     }
412   }
413 
414   if(!baij->donotstash){
415     ierr = PetscFree(in_loc);CHKERRQ(ierr);
416     ierr = PetscFree(v_loc);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
424 {
425   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
426   const MatScalar *value;
427   MatScalar       *barray=baij->barray;
428   PetscTruth      roworiented = baij->roworiented;
429   PetscErrorCode ierr;
430   int i,j,ii,jj,row,col,rstart=baij->rstart;
431   int             rend=baij->rend,cstart=baij->cstart,stepval;
432   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
433 
434   PetscFunctionBegin;
435   if(!barray) {
436     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
437     baij->barray = barray;
438   }
439 
440   if (roworiented) {
441     stepval = (n-1)*bs;
442   } else {
443     stepval = (m-1)*bs;
444   }
445   for (i=0; i<m; i++) {
446     if (im[i] < 0) continue;
447 #if defined(PETSC_USE_BOPT_g)
448     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
449 #endif
450     if (im[i] >= rstart && im[i] < rend) {
451       row = im[i] - rstart;
452       for (j=0; j<n; j++) {
453         /* If NumCol = 1 then a copy is not required */
454         if ((roworiented) && (n == 1)) {
455           barray = (MatScalar*) v + i*bs2;
456         } else if((!roworiented) && (m == 1)) {
457           barray = (MatScalar*) v + j*bs2;
458         } else { /* Here a copy is required */
459           if (roworiented) {
460             value = v + i*(stepval+bs)*bs + j*bs;
461           } else {
462             value = v + j*(stepval+bs)*bs + i*bs;
463           }
464           for (ii=0; ii<bs; ii++,value+=stepval) {
465             for (jj=0; jj<bs; jj++) {
466               *barray++  = *value++;
467             }
468           }
469           barray -=bs2;
470         }
471 
472         if (in[j] >= cstart && in[j] < cend){
473           col  = in[j] - cstart;
474           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476         else if (in[j] < 0) continue;
477 #if defined(PETSC_USE_BOPT_g)
478         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
479 #endif
480         else {
481           if (mat->was_assembled) {
482             if (!baij->colmap) {
483               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
484             }
485 
486 #if defined(PETSC_USE_BOPT_g)
487 #if defined (PETSC_USE_CTABLE)
488             { int data;
489               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
490               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
491             }
492 #else
493             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
494 #endif
495 #endif
496 #if defined (PETSC_USE_CTABLE)
497 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
498             col  = (col - 1)/bs;
499 #else
500             col = (baij->colmap[in[j]] - 1)/bs;
501 #endif
502             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
503               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
504               col =  in[j];
505             }
506           }
507           else col = in[j];
508           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510       }
511     } else {
512       if (!baij->donotstash) {
513         if (roworiented) {
514           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #define HASH_KEY 0.6180339887
525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
526 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
527 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
528 #undef __FUNCT__
529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
531 {
532   PetscFunctionBegin;
533   SETERRQ(1,"Function not yet written for SBAIJ format");
534   /* PetscFunctionReturn(0); */
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(1,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   int          bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
553   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
558     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
563         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend){
565           col = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined (PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data --;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else {
585       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
586     }
587   }
588  PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatNorm_MPISBAIJ"
593 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
594 {
595   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
596   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
597   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
598   PetscErrorCode ierr;
599   PetscReal  sum[2],*lnorm2;
600 
601   PetscFunctionBegin;
602   if (baij->size == 1) {
603     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
604   } else {
605     if (type == NORM_FROBENIUS) {
606       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
607       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
608       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
609       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
610       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
611       /*
612       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
613       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
614       */
615       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
616       /*
617       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
618       PetscSynchronizedFlush(mat->comm); */
619 
620       *norm = sqrt(sum[0] + 2*sum[1]);
621       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
622     } else {
623       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 /*
630   Creates the hash table, and sets the table
631   This table is created only once.
632   If new entried need to be added to the matrix
633   then the hash table has to be destroyed and
634   recreated.
635 */
636 #undef __FUNCT__
637 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
638 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
639 {
640   PetscFunctionBegin;
641   SETERRQ(1,"Function not yet written for SBAIJ format");
642   /* PetscFunctionReturn(0); */
643 }
644 
645 #undef __FUNCT__
646 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
647 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
648 {
649   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
650   PetscErrorCode ierr;
651   int nstash,reallocs;
652   InsertMode  addv;
653 
654   PetscFunctionBegin;
655   if (baij->donotstash) {
656     PetscFunctionReturn(0);
657   }
658 
659   /* make sure all processors are either in INSERTMODE or ADDMODE */
660   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
661   if (addv == (ADD_VALUES|INSERT_VALUES)) {
662     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
663   }
664   mat->insertmode = addv; /* in case this processor had no cache */
665 
666   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
667   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
668   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
669   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
670   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
671   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
677 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
678 {
679   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
680   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
681   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
682   PetscErrorCode ierr;
683   int         i,j,rstart,ncols,n,flg,bs2=baij->bs2;
684   int         *row,*col,other_disassembled;
685   PetscTruth  r1,r2,r3;
686   MatScalar   *val;
687   InsertMode  addv = mat->insertmode;
688 
689   PetscFunctionBegin;
690 
691   if (!baij->donotstash) {
692     while (1) {
693       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
694       /*
695       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
696       PetscSynchronizedFlush(mat->comm);
697       */
698       if (!flg) break;
699 
700       for (i=0; i<n;) {
701         /* Now identify the consecutive vals belonging to the same row */
702         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
703         if (j < n) ncols = j-i;
704         else       ncols = n-i;
705         /* Now assemble all these values with a single function call */
706         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
707         i = j;
708       }
709     }
710     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
711     /* Now process the block-stash. Since the values are stashed column-oriented,
712        set the roworiented flag to column oriented, and after MatSetValues()
713        restore the original flags */
714     r1 = baij->roworiented;
715     r2 = a->roworiented;
716     r3 = b->roworiented;
717     baij->roworiented = PETSC_FALSE;
718     a->roworiented    = PETSC_FALSE;
719     b->roworiented    = PETSC_FALSE;
720     while (1) {
721       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
722       if (!flg) break;
723 
724       for (i=0; i<n;) {
725         /* Now identify the consecutive vals belonging to the same row */
726         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
727         if (j < n) ncols = j-i;
728         else       ncols = n-i;
729         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
730         i = j;
731       }
732     }
733     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
734     baij->roworiented = r1;
735     a->roworiented    = r2;
736     b->roworiented    = r3;
737   }
738 
739   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
741 
742   /* determine if any processor has disassembled, if so we must
743      also disassemble ourselfs, in order that we may reassemble. */
744   /*
745      if nonzero structure of submatrix B cannot change then we know that
746      no processor disassembled thus we can skip this stuff
747   */
748   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
749     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
750     if (mat->was_assembled && !other_disassembled) {
751       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
752     }
753   }
754 
755   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
756     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
757   }
758   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
759   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
760 
761 #if defined(PETSC_USE_BOPT_g)
762   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
763     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
764     baij->ht_total_ct  = 0;
765     baij->ht_insert_ct = 0;
766   }
767 #endif
768   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
769     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
770     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
771     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
772   }
773 
774   if (baij->rowvalues) {
775     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
776     baij->rowvalues = 0;
777   }
778 
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
784 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
785 {
786   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
787   PetscErrorCode ierr;
788   int bs = baij->bs,size = baij->size,rank = baij->rank;
789   PetscTruth        iascii,isdraw;
790   PetscViewer       sviewer;
791   PetscViewerFormat format;
792 
793   PetscFunctionBegin;
794   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
795   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
796   if (iascii) {
797     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
798     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
799       MatInfo info;
800       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
801       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
802       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
803               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
804               baij->bs,(int)info.memory);CHKERRQ(ierr);
805       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
806       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
807       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
808       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
809       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
810       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
811       PetscFunctionReturn(0);
812     } else if (format == PETSC_VIEWER_ASCII_INFO) {
813       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
814       PetscFunctionReturn(0);
815     }
816   }
817 
818   if (isdraw) {
819     PetscDraw       draw;
820     PetscTruth isnull;
821     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
822     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
823   }
824 
825   if (size == 1) {
826     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
827     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
828   } else {
829     /* assemble the entire matrix onto first processor. */
830     Mat         A;
831     Mat_SeqSBAIJ *Aloc;
832     Mat_SeqBAIJ *Bloc;
833     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
834     MatScalar   *a;
835 
836     /* Should this be the same type as mat? */
837     if (!rank) {
838       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
839     } else {
840       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
841     }
842     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
843     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
844     PetscLogObjectParent(mat,A);
845 
846     /* copy over the A part */
847     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
848     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
849     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
850 
851     for (i=0; i<mbs; i++) {
852       rvals[0] = bs*(baij->rstart + i);
853       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
854       for (j=ai[i]; j<ai[i+1]; j++) {
855         col = (baij->cstart+aj[j])*bs;
856         for (k=0; k<bs; k++) {
857           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
858           col++; a += bs;
859         }
860       }
861     }
862     /* copy over the B part */
863     Bloc = (Mat_SeqBAIJ*)baij->B->data;
864     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
865     for (i=0; i<mbs; i++) {
866       rvals[0] = bs*(baij->rstart + i);
867       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
868       for (j=ai[i]; j<ai[i+1]; j++) {
869         col = baij->garray[aj[j]]*bs;
870         for (k=0; k<bs; k++) {
871           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
872           col++; a += bs;
873         }
874       }
875     }
876     ierr = PetscFree(rvals);CHKERRQ(ierr);
877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
879     /*
880        Everyone has to call to draw the matrix since the graphics waits are
881        synchronized across all processors that share the PetscDraw object
882     */
883     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
884     if (!rank) {
885       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
886       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
887     }
888     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
889     ierr = MatDestroy(A);CHKERRQ(ierr);
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatView_MPISBAIJ"
896 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
897 {
898   PetscErrorCode ierr;
899   PetscTruth iascii,isdraw,issocket,isbinary;
900 
901   PetscFunctionBegin;
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
906   if (iascii || isdraw || issocket || isbinary) {
907     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
908   } else {
909     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
910   }
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatDestroy_MPISBAIJ"
916 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
917 {
918   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
919   PetscErrorCode ierr;
920 
921   PetscFunctionBegin;
922 #if defined(PETSC_USE_LOG)
923   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
924 #endif
925   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
926   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
927   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
928   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
929   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
930 #if defined (PETSC_USE_CTABLE)
931   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
932 #else
933   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
934 #endif
935   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
936   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
937   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
938   if (baij->slvec0) {
939     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
940     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
941   }
942   if (baij->slvec1) {
943     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
944     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
945     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
946   }
947   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
948   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
949   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
950   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
951 #if defined(PETSC_USE_MAT_SINGLE)
952   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
953 #endif
954   ierr = PetscFree(baij);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNCT__
959 #define __FUNCT__ "MatMult_MPISBAIJ"
960 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
961 {
962   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964   int nt,mbs=a->mbs,bs=a->bs;
965   PetscScalar *x,*from,zero=0.0;
966 
967   PetscFunctionBegin;
968   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
969   if (nt != A->n) {
970     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
971   }
972   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
973   if (nt != A->m) {
974     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
975   }
976 
977   /* diagonal part */
978   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
979   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
980 
981   /* subdiagonal part */
982   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
983 
984   /* copy x into the vec slvec0 */
985   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
986   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
987   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
988   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
989 
990   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
991   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
992   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
993 
994   /* supperdiagonal part */
995   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
996 
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1002 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1003 {
1004   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1005   PetscErrorCode ierr;
1006   int nt;
1007 
1008   PetscFunctionBegin;
1009   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1010   if (nt != A->n) {
1011     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1012   }
1013   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1014   if (nt != A->m) {
1015     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1016   }
1017 
1018   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1019   /* do diagonal part */
1020   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1021   /* do supperdiagonal part */
1022   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1024   /* do subdiagonal part */
1025   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1026   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1027   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1028 
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1034 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1035 {
1036   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   int mbs=a->mbs,bs=a->bs;
1039   PetscScalar  *x,*from,zero=0.0;
1040 
1041   PetscFunctionBegin;
1042   /*
1043   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1044   PetscSynchronizedFlush(A->comm);
1045   */
1046   /* diagonal part */
1047   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1048   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1049 
1050   /* subdiagonal part */
1051   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1052 
1053   /* copy x into the vec slvec0 */
1054   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1055   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1056   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1057   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1058 
1059   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1060   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1061   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1062 
1063   /* supperdiagonal part */
1064   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1065 
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1071 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1078   /* do diagonal part */
1079   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1080   /* do supperdiagonal part */
1081   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1082   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1083 
1084   /* do subdiagonal part */
1085   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1086   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1088 
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1094 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1095 {
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1105 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 /*
1115   This only works correctly for square matrices where the subblock A->A is the
1116    diagonal block
1117 */
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1120 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1121 {
1122   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1127   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatScale_MPISBAIJ"
1133 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1134 {
1135   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1140   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1146 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1147 {
1148   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1149   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1150   PetscErrorCode ierr;
1151   int            bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1152   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1153   int            *cmap,*idx_p,cstart = mat->cstart;
1154 
1155   PetscFunctionBegin;
1156   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1157   mat->getrowactive = PETSC_TRUE;
1158 
1159   if (!mat->rowvalues && (idx || v)) {
1160     /*
1161         allocate enough space to hold information from the longest row.
1162     */
1163     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1164     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1165     int     max = 1,mbs = mat->mbs,tmp;
1166     for (i=0; i<mbs; i++) {
1167       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1168       if (max < tmp) { max = tmp; }
1169     }
1170     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1171     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1172   }
1173 
1174   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1175   lrow = row - brstart;  /* local row index */
1176 
1177   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1178   if (!v)   {pvA = 0; pvB = 0;}
1179   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1180   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1181   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1182   nztot = nzA + nzB;
1183 
1184   cmap  = mat->garray;
1185   if (v  || idx) {
1186     if (nztot) {
1187       /* Sort by increasing column numbers, assuming A and B already sorted */
1188       int imark = -1;
1189       if (v) {
1190         *v = v_p = mat->rowvalues;
1191         for (i=0; i<nzB; i++) {
1192           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1193           else break;
1194         }
1195         imark = i;
1196         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1197         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1198       }
1199       if (idx) {
1200         *idx = idx_p = mat->rowindices;
1201         if (imark > -1) {
1202           for (i=0; i<imark; i++) {
1203             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1204           }
1205         } else {
1206           for (i=0; i<nzB; i++) {
1207             if (cmap[cworkB[i]/bs] < cstart)
1208               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1209             else break;
1210           }
1211           imark = i;
1212         }
1213         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1214         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1215       }
1216     } else {
1217       if (idx) *idx = 0;
1218       if (v)   *v   = 0;
1219     }
1220   }
1221   *nz = nztot;
1222   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1223   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1229 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1230 {
1231   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1232 
1233   PetscFunctionBegin;
1234   if (baij->getrowactive == PETSC_FALSE) {
1235     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1236   }
1237   baij->getrowactive = PETSC_FALSE;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1243 PetscErrorCode MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1244 {
1245   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1246 
1247   PetscFunctionBegin;
1248   *bs = baij->bs;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1254 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1255 {
1256   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1261   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1267 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1268 {
1269   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1270   Mat         A = a->A,B = a->B;
1271   PetscErrorCode ierr;
1272   PetscReal   isend[5],irecv[5];
1273 
1274   PetscFunctionBegin;
1275   info->block_size     = (PetscReal)a->bs;
1276   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1277   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1278   isend[3] = info->memory;  isend[4] = info->mallocs;
1279   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1280   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1281   isend[3] += info->memory;  isend[4] += info->mallocs;
1282   if (flag == MAT_LOCAL) {
1283     info->nz_used      = isend[0];
1284     info->nz_allocated = isend[1];
1285     info->nz_unneeded  = isend[2];
1286     info->memory       = isend[3];
1287     info->mallocs      = isend[4];
1288   } else if (flag == MAT_GLOBAL_MAX) {
1289     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1290     info->nz_used      = irecv[0];
1291     info->nz_allocated = irecv[1];
1292     info->nz_unneeded  = irecv[2];
1293     info->memory       = irecv[3];
1294     info->mallocs      = irecv[4];
1295   } else if (flag == MAT_GLOBAL_SUM) {
1296     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1297     info->nz_used      = irecv[0];
1298     info->nz_allocated = irecv[1];
1299     info->nz_unneeded  = irecv[2];
1300     info->memory       = irecv[3];
1301     info->mallocs      = irecv[4];
1302   } else {
1303     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1304   }
1305   info->rows_global       = (PetscReal)A->M;
1306   info->columns_global    = (PetscReal)A->N;
1307   info->rows_local        = (PetscReal)A->m;
1308   info->columns_local     = (PetscReal)A->N;
1309   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1310   info->fill_ratio_needed = 0;
1311   info->factor_mallocs    = 0;
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1317 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1318 {
1319   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   switch (op) {
1324   case MAT_NO_NEW_NONZERO_LOCATIONS:
1325   case MAT_YES_NEW_NONZERO_LOCATIONS:
1326   case MAT_COLUMNS_UNSORTED:
1327   case MAT_COLUMNS_SORTED:
1328   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1329   case MAT_KEEP_ZEROED_ROWS:
1330   case MAT_NEW_NONZERO_LOCATION_ERR:
1331     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1332     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1333     break;
1334   case MAT_ROW_ORIENTED:
1335     a->roworiented = PETSC_TRUE;
1336     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1337     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1338     break;
1339   case MAT_ROWS_SORTED:
1340   case MAT_ROWS_UNSORTED:
1341   case MAT_YES_NEW_DIAGONALS:
1342     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1343     break;
1344   case MAT_COLUMN_ORIENTED:
1345     a->roworiented = PETSC_FALSE;
1346     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1347     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1348     break;
1349   case MAT_IGNORE_OFF_PROC_ENTRIES:
1350     a->donotstash = PETSC_TRUE;
1351     break;
1352   case MAT_NO_NEW_DIAGONALS:
1353     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1354   case MAT_USE_HASH_TABLE:
1355     a->ht_flag = PETSC_TRUE;
1356     break;
1357   case MAT_NOT_SYMMETRIC:
1358   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1359   case MAT_HERMITIAN:
1360     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1361   case MAT_SYMMETRIC:
1362   case MAT_STRUCTURALLY_SYMMETRIC:
1363   case MAT_NOT_HERMITIAN:
1364   case MAT_SYMMETRY_ETERNAL:
1365   case MAT_NOT_SYMMETRY_ETERNAL:
1366     break;
1367   default:
1368     SETERRQ(PETSC_ERR_SUP,"unknown option");
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1375 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1376 {
1377   PetscErrorCode ierr;
1378   PetscFunctionBegin;
1379   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1385 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1386 {
1387   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1388   Mat         a = baij->A,b = baij->B;
1389   PetscErrorCode ierr;
1390   int s1,s2,s3;
1391 
1392   PetscFunctionBegin;
1393   if (ll != rr) {
1394     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1395   }
1396   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1397   if (rr) {
1398     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1399     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1400     /* Overlap communication with computation. */
1401     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1402     /*} if (ll) { */
1403     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1404     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1405     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1406     /* } */
1407   /* scale  the diagonal block */
1408   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1409 
1410   /* if (rr) { */
1411     /* Do a scatter end and then right scale the off-diagonal block */
1412     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1413     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1414   }
1415 
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1421 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1422 {
1423   PetscFunctionBegin;
1424   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1429 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1430 {
1431   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1432   MPI_Comm    comm = A->comm;
1433   static int  called = 0;
1434   PetscErrorCode ierr;
1435 
1436   PetscFunctionBegin;
1437   if (!a->rank) {
1438     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1439   }
1440   if (called) {PetscFunctionReturn(0);} else called = 1;
1441   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1442   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1448 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1449 {
1450   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatEqual_MPISBAIJ"
1462 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1463 {
1464   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1465   Mat         a,b,c,d;
1466   PetscTruth  flg;
1467   PetscErrorCode ierr;
1468 
1469   PetscFunctionBegin;
1470   a = matA->A; b = matA->B;
1471   c = matB->A; d = matB->B;
1472 
1473   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1474   if (flg == PETSC_TRUE) {
1475     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1476   }
1477   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1483 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1484 {
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 #undef __FUNCT__
1493 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1494 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1495 {
1496   PetscErrorCode ierr;
1497   int        i;
1498   PetscTruth flg;
1499 
1500   PetscFunctionBegin;
1501   for (i=0; i<n; i++) {
1502     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1503     if (!flg) {
1504       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1505     }
1506   }
1507   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 
1512 /* -------------------------------------------------------------------*/
1513 static struct _MatOps MatOps_Values = {
1514        MatSetValues_MPISBAIJ,
1515        MatGetRow_MPISBAIJ,
1516        MatRestoreRow_MPISBAIJ,
1517        MatMult_MPISBAIJ,
1518 /* 4*/ MatMultAdd_MPISBAIJ,
1519        MatMultTranspose_MPISBAIJ,
1520        MatMultTransposeAdd_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524 /*10*/ 0,
1525        0,
1526        0,
1527        MatRelax_MPISBAIJ,
1528        MatTranspose_MPISBAIJ,
1529 /*15*/ MatGetInfo_MPISBAIJ,
1530        MatEqual_MPISBAIJ,
1531        MatGetDiagonal_MPISBAIJ,
1532        MatDiagonalScale_MPISBAIJ,
1533        MatNorm_MPISBAIJ,
1534 /*20*/ MatAssemblyBegin_MPISBAIJ,
1535        MatAssemblyEnd_MPISBAIJ,
1536        0,
1537        MatSetOption_MPISBAIJ,
1538        MatZeroEntries_MPISBAIJ,
1539 /*25*/ MatZeroRows_MPISBAIJ,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549 /*35*/ MatDuplicate_MPISBAIJ,
1550        0,
1551        0,
1552        0,
1553        0,
1554 /*40*/ 0,
1555        MatGetSubMatrices_MPISBAIJ,
1556        MatIncreaseOverlap_MPISBAIJ,
1557        MatGetValues_MPISBAIJ,
1558        0,
1559 /*45*/ MatPrintHelp_MPISBAIJ,
1560        MatScale_MPISBAIJ,
1561        0,
1562        0,
1563        0,
1564 /*50*/ MatGetBlockSize_MPISBAIJ,
1565        0,
1566        0,
1567        0,
1568        0,
1569 /*55*/ 0,
1570        0,
1571        MatSetUnfactored_MPISBAIJ,
1572        0,
1573        MatSetValuesBlocked_MPISBAIJ,
1574 /*60*/ 0,
1575        0,
1576        0,
1577        MatGetPetscMaps_Petsc,
1578        0,
1579 /*65*/ 0,
1580        0,
1581        0,
1582        0,
1583        0,
1584 /*70*/ MatGetRowMax_MPISBAIJ,
1585        0,
1586        0,
1587        0,
1588        0,
1589 /*75*/ 0,
1590        0,
1591        0,
1592        0,
1593        0,
1594 /*80*/ 0,
1595        0,
1596        0,
1597        0,
1598        MatLoad_MPISBAIJ,
1599 /*85*/ 0,
1600        0,
1601        0,
1602        0,
1603        0,
1604 /*90*/ 0,
1605        0,
1606        0,
1607        0,
1608        0,
1609 /*95*/ 0,
1610        0,
1611        0,
1612        0};
1613 
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1618 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1619 {
1620   PetscFunctionBegin;
1621   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1622   *iscopy = PETSC_FALSE;
1623   PetscFunctionReturn(0);
1624 }
1625 EXTERN_C_END
1626 
1627 EXTERN_C_BEGIN
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1630 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1631 {
1632   Mat_MPISBAIJ *b;
1633   PetscErrorCode ierr;
1634   int i,mbs,Mbs;
1635 
1636   PetscFunctionBegin;
1637   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1638 
1639   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1640   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1641   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1642   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1643   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1644   if (d_nnz) {
1645     for (i=0; i<B->m/bs; i++) {
1646       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1647     }
1648   }
1649   if (o_nnz) {
1650     for (i=0; i<B->m/bs; i++) {
1651       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1652     }
1653   }
1654   B->preallocated = PETSC_TRUE;
1655   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1656   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1657   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1658   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1659 
1660   b   = (Mat_MPISBAIJ*)B->data;
1661   mbs = B->m/bs;
1662   Mbs = B->M/bs;
1663   if (mbs*bs != B->m) {
1664     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1665   }
1666 
1667   b->bs  = bs;
1668   b->bs2 = bs*bs;
1669   b->mbs = mbs;
1670   b->nbs = mbs;
1671   b->Mbs = Mbs;
1672   b->Nbs = Mbs;
1673 
1674   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1675   b->rowners[0]    = 0;
1676   for (i=2; i<=b->size; i++) {
1677     b->rowners[i] += b->rowners[i-1];
1678   }
1679   b->rstart    = b->rowners[b->rank];
1680   b->rend      = b->rowners[b->rank+1];
1681   b->cstart    = b->rstart;
1682   b->cend      = b->rend;
1683   for (i=0; i<=b->size; i++) {
1684     b->rowners_bs[i] = b->rowners[i]*bs;
1685   }
1686   b->rstart_bs = b-> rstart*bs;
1687   b->rend_bs   = b->rend*bs;
1688 
1689   b->cstart_bs = b->cstart*bs;
1690   b->cend_bs   = b->cend*bs;
1691 
1692   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1693   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1694   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1695   PetscLogObjectParent(B,b->A);
1696 
1697   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1698   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1699   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1700   PetscLogObjectParent(B,b->B);
1701 
1702   /* build cache for off array entries formed */
1703   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1704 
1705   PetscFunctionReturn(0);
1706 }
1707 EXTERN_C_END
1708 
1709 /*MC
1710    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1711    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1712 
1713    Options Database Keys:
1714 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1715 
1716   Level: beginner
1717 
1718 .seealso: MatCreateMPISBAIJ
1719 M*/
1720 
1721 EXTERN_C_BEGIN
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatCreate_MPISBAIJ"
1724 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1725 {
1726   Mat_MPISBAIJ *b;
1727   PetscErrorCode ierr;
1728   PetscTruth   flg;
1729 
1730   PetscFunctionBegin;
1731 
1732   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1733   B->data = (void*)b;
1734   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1735   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1736 
1737   B->ops->destroy    = MatDestroy_MPISBAIJ;
1738   B->ops->view       = MatView_MPISBAIJ;
1739   B->mapping    = 0;
1740   B->factor     = 0;
1741   B->assembled  = PETSC_FALSE;
1742 
1743   B->insertmode = NOT_SET_VALUES;
1744   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1745   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1746 
1747   /* build local table of row and column ownerships */
1748   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1749   b->cowners    = b->rowners + b->size + 2;
1750   b->rowners_bs = b->cowners + b->size + 2;
1751   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1752 
1753   /* build cache for off array entries formed */
1754   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1755   b->donotstash  = PETSC_FALSE;
1756   b->colmap      = PETSC_NULL;
1757   b->garray      = PETSC_NULL;
1758   b->roworiented = PETSC_TRUE;
1759 
1760 #if defined(PETSC_USE_MAT_SINGLE)
1761   /* stuff for MatSetValues_XXX in single precision */
1762   b->setvalueslen     = 0;
1763   b->setvaluescopy    = PETSC_NULL;
1764 #endif
1765 
1766   /* stuff used in block assembly */
1767   b->barray       = 0;
1768 
1769   /* stuff used for matrix vector multiply */
1770   b->lvec         = 0;
1771   b->Mvctx        = 0;
1772   b->slvec0       = 0;
1773   b->slvec0b      = 0;
1774   b->slvec1       = 0;
1775   b->slvec1a      = 0;
1776   b->slvec1b      = 0;
1777   b->sMvctx       = 0;
1778 
1779   /* stuff for MatGetRow() */
1780   b->rowindices   = 0;
1781   b->rowvalues    = 0;
1782   b->getrowactive = PETSC_FALSE;
1783 
1784   /* hash table stuff */
1785   b->ht           = 0;
1786   b->hd           = 0;
1787   b->ht_size      = 0;
1788   b->ht_flag      = PETSC_FALSE;
1789   b->ht_fact      = 0;
1790   b->ht_total_ct  = 0;
1791   b->ht_insert_ct = 0;
1792 
1793   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1794   if (flg) {
1795     PetscReal fact = 1.39;
1796     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1797     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1798     if (fact <= 1.0) fact = 1.39;
1799     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1800     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1801   }
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1803                                      "MatStoreValues_MPISBAIJ",
1804                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1806                                      "MatRetrieveValues_MPISBAIJ",
1807                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1808   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1809                                      "MatGetDiagonalBlock_MPISBAIJ",
1810                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1812                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1813                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1814   B->symmetric                  = PETSC_TRUE;
1815   B->structurally_symmetric     = PETSC_TRUE;
1816   B->symmetric_set              = PETSC_TRUE;
1817   B->structurally_symmetric_set = PETSC_TRUE;
1818   PetscFunctionReturn(0);
1819 }
1820 EXTERN_C_END
1821 
1822 /*MC
1823    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1824 
1825    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1826    and MATMPISBAIJ otherwise.
1827 
1828    Options Database Keys:
1829 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1830 
1831   Level: beginner
1832 
1833 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1834 M*/
1835 
1836 EXTERN_C_BEGIN
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatCreate_SBAIJ"
1839 PetscErrorCode MatCreate_SBAIJ(Mat A)
1840 {
1841   PetscErrorCode ierr;
1842   int size;
1843 
1844   PetscFunctionBegin;
1845   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1846   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1847   if (size == 1) {
1848     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1849   } else {
1850     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1851   }
1852   PetscFunctionReturn(0);
1853 }
1854 EXTERN_C_END
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1858 /*@C
1859    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1860    the user should preallocate the matrix storage by setting the parameters
1861    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1862    performance can be increased by more than a factor of 50.
1863 
1864    Collective on Mat
1865 
1866    Input Parameters:
1867 +  A - the matrix
1868 .  bs   - size of blockk
1869 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1870            submatrix  (same for all local rows)
1871 .  d_nnz - array containing the number of block nonzeros in the various block rows
1872            in the upper triangular and diagonal part of the in diagonal portion of the local
1873            (possibly different for each block row) or PETSC_NULL.  You must leave room
1874            for the diagonal entry even if it is zero.
1875 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1876            submatrix (same for all local rows).
1877 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1878            off-diagonal portion of the local submatrix (possibly different for
1879            each block row) or PETSC_NULL.
1880 
1881 
1882    Options Database Keys:
1883 .   -mat_no_unroll - uses code that does not unroll the loops in the
1884                      block calculations (much slower)
1885 .   -mat_block_size - size of the blocks to use
1886 
1887    Notes:
1888 
1889    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1890    than it must be used on all processors that share the object for that argument.
1891 
1892    Storage Information:
1893    For a square global matrix we define each processor's diagonal portion
1894    to be its local rows and the corresponding columns (a square submatrix);
1895    each processor's off-diagonal portion encompasses the remainder of the
1896    local matrix (a rectangular submatrix).
1897 
1898    The user can specify preallocated storage for the diagonal part of
1899    the local submatrix with either d_nz or d_nnz (not both).  Set
1900    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1901    memory allocation.  Likewise, specify preallocated storage for the
1902    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1903 
1904    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1905    the figure below we depict these three local rows and all columns (0-11).
1906 
1907 .vb
1908            0 1 2 3 4 5 6 7 8 9 10 11
1909           -------------------
1910    row 3  |  o o o d d d o o o o o o
1911    row 4  |  o o o d d d o o o o o o
1912    row 5  |  o o o d d d o o o o o o
1913           -------------------
1914 .ve
1915 
1916    Thus, any entries in the d locations are stored in the d (diagonal)
1917    submatrix, and any entries in the o locations are stored in the
1918    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1919    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1920 
1921    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1922    plus the diagonal part of the d matrix,
1923    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1924    In general, for PDE problems in which most nonzeros are near the diagonal,
1925    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1926    or you will get TERRIBLE performance; see the users' manual chapter on
1927    matrices.
1928 
1929    Level: intermediate
1930 
1931 .keywords: matrix, block, aij, compressed row, sparse, parallel
1932 
1933 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1934 @*/
1935 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1936 {
1937   PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1938 
1939   PetscFunctionBegin;
1940   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1941   if (f) {
1942     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatCreateMPISBAIJ"
1949 /*@C
1950    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1951    (block compressed row).  For good matrix assembly performance
1952    the user should preallocate the matrix storage by setting the parameters
1953    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1954    performance can be increased by more than a factor of 50.
1955 
1956    Collective on MPI_Comm
1957 
1958    Input Parameters:
1959 +  comm - MPI communicator
1960 .  bs   - size of blockk
1961 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1962            This value should be the same as the local size used in creating the
1963            y vector for the matrix-vector product y = Ax.
1964 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1965            This value should be the same as the local size used in creating the
1966            x vector for the matrix-vector product y = Ax.
1967 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1968 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1969 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1970            submatrix  (same for all local rows)
1971 .  d_nnz - array containing the number of block nonzeros in the various block rows
1972            in the upper triangular portion of the in diagonal portion of the local
1973            (possibly different for each block block row) or PETSC_NULL.
1974            You must leave room for the diagonal entry even if it is zero.
1975 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1976            submatrix (same for all local rows).
1977 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1978            off-diagonal portion of the local submatrix (possibly different for
1979            each block row) or PETSC_NULL.
1980 
1981    Output Parameter:
1982 .  A - the matrix
1983 
1984    Options Database Keys:
1985 .   -mat_no_unroll - uses code that does not unroll the loops in the
1986                      block calculations (much slower)
1987 .   -mat_block_size - size of the blocks to use
1988 .   -mat_mpi - use the parallel matrix data structures even on one processor
1989                (defaults to using SeqBAIJ format on one processor)
1990 
1991    Notes:
1992    The user MUST specify either the local or global matrix dimensions
1993    (possibly both).
1994 
1995    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1996    than it must be used on all processors that share the object for that argument.
1997 
1998    Storage Information:
1999    For a square global matrix we define each processor's diagonal portion
2000    to be its local rows and the corresponding columns (a square submatrix);
2001    each processor's off-diagonal portion encompasses the remainder of the
2002    local matrix (a rectangular submatrix).
2003 
2004    The user can specify preallocated storage for the diagonal part of
2005    the local submatrix with either d_nz or d_nnz (not both).  Set
2006    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2007    memory allocation.  Likewise, specify preallocated storage for the
2008    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2009 
2010    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2011    the figure below we depict these three local rows and all columns (0-11).
2012 
2013 .vb
2014            0 1 2 3 4 5 6 7 8 9 10 11
2015           -------------------
2016    row 3  |  o o o d d d o o o o o o
2017    row 4  |  o o o d d d o o o o o o
2018    row 5  |  o o o d d d o o o o o o
2019           -------------------
2020 .ve
2021 
2022    Thus, any entries in the d locations are stored in the d (diagonal)
2023    submatrix, and any entries in the o locations are stored in the
2024    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2025    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2026 
2027    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2028    plus the diagonal part of the d matrix,
2029    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2030    In general, for PDE problems in which most nonzeros are near the diagonal,
2031    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2032    or you will get TERRIBLE performance; see the users' manual chapter on
2033    matrices.
2034 
2035    Level: intermediate
2036 
2037 .keywords: matrix, block, aij, compressed row, sparse, parallel
2038 
2039 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2040 @*/
2041 
2042 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2043 {
2044   PetscErrorCode ierr;
2045   int size;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2049   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2050   if (size > 1) {
2051     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2052     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2053   } else {
2054     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2055     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2056   }
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 
2061 #undef __FUNCT__
2062 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2063 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2064 {
2065   Mat          mat;
2066   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2067   PetscErrorCode ierr;
2068   int len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2069   PetscScalar  *array;
2070 
2071   PetscFunctionBegin;
2072   *newmat       = 0;
2073   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2074   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2075   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2076 
2077   mat->factor       = matin->factor;
2078   mat->preallocated = PETSC_TRUE;
2079   mat->assembled    = PETSC_TRUE;
2080   mat->insertmode   = NOT_SET_VALUES;
2081 
2082   a = (Mat_MPISBAIJ*)mat->data;
2083   a->bs  = oldmat->bs;
2084   a->bs2 = oldmat->bs2;
2085   a->mbs = oldmat->mbs;
2086   a->nbs = oldmat->nbs;
2087   a->Mbs = oldmat->Mbs;
2088   a->Nbs = oldmat->Nbs;
2089 
2090   a->rstart       = oldmat->rstart;
2091   a->rend         = oldmat->rend;
2092   a->cstart       = oldmat->cstart;
2093   a->cend         = oldmat->cend;
2094   a->size         = oldmat->size;
2095   a->rank         = oldmat->rank;
2096   a->donotstash   = oldmat->donotstash;
2097   a->roworiented  = oldmat->roworiented;
2098   a->rowindices   = 0;
2099   a->rowvalues    = 0;
2100   a->getrowactive = PETSC_FALSE;
2101   a->barray       = 0;
2102   a->rstart_bs    = oldmat->rstart_bs;
2103   a->rend_bs      = oldmat->rend_bs;
2104   a->cstart_bs    = oldmat->cstart_bs;
2105   a->cend_bs      = oldmat->cend_bs;
2106 
2107   /* hash table stuff */
2108   a->ht           = 0;
2109   a->hd           = 0;
2110   a->ht_size      = 0;
2111   a->ht_flag      = oldmat->ht_flag;
2112   a->ht_fact      = oldmat->ht_fact;
2113   a->ht_total_ct  = 0;
2114   a->ht_insert_ct = 0;
2115 
2116   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2117   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2118   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2119   if (oldmat->colmap) {
2120 #if defined (PETSC_USE_CTABLE)
2121     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2122 #else
2123     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2124     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2125     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2126 #endif
2127   } else a->colmap = 0;
2128 
2129   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2130     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2131     PetscLogObjectMemory(mat,len*sizeof(int));
2132     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2133   } else a->garray = 0;
2134 
2135   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2136   PetscLogObjectParent(mat,a->lvec);
2137   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2138   PetscLogObjectParent(mat,a->Mvctx);
2139 
2140   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2141   PetscLogObjectParent(mat,a->slvec0);
2142   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2143   PetscLogObjectParent(mat,a->slvec1);
2144 
2145   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2146   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2147   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2148   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2149   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2150   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2151   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2152   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2153   PetscLogObjectParent(mat,a->slvec0);
2154   PetscLogObjectParent(mat,a->slvec1);
2155   PetscLogObjectParent(mat,a->slvec0b);
2156   PetscLogObjectParent(mat,a->slvec1a);
2157   PetscLogObjectParent(mat,a->slvec1b);
2158 
2159   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2160   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2161   a->sMvctx = oldmat->sMvctx;
2162   PetscLogObjectParent(mat,a->sMvctx);
2163 
2164   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2165   PetscLogObjectParent(mat,a->A);
2166   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2167   PetscLogObjectParent(mat,a->B);
2168   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2169   *newmat = mat;
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #include "petscsys.h"
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatLoad_MPISBAIJ"
2177 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2178 {
2179   Mat          A;
2180   PetscErrorCode ierr;
2181   int          i,nz,j,rstart,rend,fd;
2182   PetscScalar  *vals,*buf;
2183   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2184   MPI_Status   status;
2185   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2186   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2187   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2188   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2189   int          dcount,kmax,k,nzcount,tmp;
2190 
2191   PetscFunctionBegin;
2192   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2193 
2194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2195   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2196   if (!rank) {
2197     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2198     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2199     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2200     if (header[3] < 0) {
2201       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2202     }
2203   }
2204 
2205   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2206   M = header[1]; N = header[2];
2207 
2208   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2209 
2210   /*
2211      This code adds extra rows to make sure the number of rows is
2212      divisible by the blocksize
2213   */
2214   Mbs        = M/bs;
2215   extra_rows = bs - M + bs*(Mbs);
2216   if (extra_rows == bs) extra_rows = 0;
2217   else                  Mbs++;
2218   if (extra_rows &&!rank) {
2219     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2220   }
2221 
2222   /* determine ownership of all rows */
2223   mbs        = Mbs/size + ((Mbs % size) > rank);
2224   m          = mbs*bs;
2225   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2226   browners   = rowners + size + 1;
2227   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2228   rowners[0] = 0;
2229   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2230   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2231   rstart = rowners[rank];
2232   rend   = rowners[rank+1];
2233 
2234   /* distribute row lengths to all processors */
2235   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2236   if (!rank) {
2237     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2238     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2239     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2240     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2241     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2242     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2243     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2244   } else {
2245     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2246   }
2247 
2248   if (!rank) {   /* procs[0] */
2249     /* calculate the number of nonzeros on each processor */
2250     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2251     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2252     for (i=0; i<size; i++) {
2253       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2254         procsnz[i] += rowlengths[j];
2255       }
2256     }
2257     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2258 
2259     /* determine max buffer needed and allocate it */
2260     maxnz = 0;
2261     for (i=0; i<size; i++) {
2262       maxnz = PetscMax(maxnz,procsnz[i]);
2263     }
2264     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2265 
2266     /* read in my part of the matrix column indices  */
2267     nz     = procsnz[0];
2268     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2269     mycols = ibuf;
2270     if (size == 1)  nz -= extra_rows;
2271     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2272     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2273 
2274     /* read in every ones (except the last) and ship off */
2275     for (i=1; i<size-1; i++) {
2276       nz   = procsnz[i];
2277       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2278       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2279     }
2280     /* read in the stuff for the last proc */
2281     if (size != 1) {
2282       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2283       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2284       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2285       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2286     }
2287     ierr = PetscFree(cols);CHKERRQ(ierr);
2288   } else {  /* procs[i], i>0 */
2289     /* determine buffer space needed for message */
2290     nz = 0;
2291     for (i=0; i<m; i++) {
2292       nz += locrowlens[i];
2293     }
2294     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2295     mycols = ibuf;
2296     /* receive message of column indices*/
2297     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2298     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2299     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2300   }
2301 
2302   /* loop over local rows, determining number of off diagonal entries */
2303   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2304   odlens   = dlens + (rend-rstart);
2305   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2306   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2307   masked1  = mask    + Mbs;
2308   masked2  = masked1 + Mbs;
2309   rowcount = 0; nzcount = 0;
2310   for (i=0; i<mbs; i++) {
2311     dcount  = 0;
2312     odcount = 0;
2313     for (j=0; j<bs; j++) {
2314       kmax = locrowlens[rowcount];
2315       for (k=0; k<kmax; k++) {
2316         tmp = mycols[nzcount++]/bs; /* block col. index */
2317         if (!mask[tmp]) {
2318           mask[tmp] = 1;
2319           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2320           else masked1[dcount++] = tmp; /* entry in diag portion */
2321         }
2322       }
2323       rowcount++;
2324     }
2325 
2326     dlens[i]  = dcount;  /* d_nzz[i] */
2327     odlens[i] = odcount; /* o_nzz[i] */
2328 
2329     /* zero out the mask elements we set */
2330     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2331     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2332   }
2333 
2334   /* create our matrix */
2335   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2336   ierr = MatSetType(A,type);CHKERRQ(ierr);
2337   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2338   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2339 
2340   if (!rank) {
2341     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2342     /* read in my part of the matrix numerical values  */
2343     nz = procsnz[0];
2344     vals = buf;
2345     mycols = ibuf;
2346     if (size == 1)  nz -= extra_rows;
2347     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2348     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2349 
2350     /* insert into matrix */
2351     jj      = rstart*bs;
2352     for (i=0; i<m; i++) {
2353       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2354       mycols += locrowlens[i];
2355       vals   += locrowlens[i];
2356       jj++;
2357     }
2358 
2359     /* read in other processors (except the last one) and ship out */
2360     for (i=1; i<size-1; i++) {
2361       nz   = procsnz[i];
2362       vals = buf;
2363       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2364       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2365     }
2366     /* the last proc */
2367     if (size != 1){
2368       nz   = procsnz[i] - extra_rows;
2369       vals = buf;
2370       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2371       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2372       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2373     }
2374     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2375 
2376   } else {
2377     /* receive numeric values */
2378     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2379 
2380     /* receive message of values*/
2381     vals   = buf;
2382     mycols = ibuf;
2383     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2384     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2385     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2386 
2387     /* insert into matrix */
2388     jj      = rstart*bs;
2389     for (i=0; i<m; i++) {
2390       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2391       mycols += locrowlens[i];
2392       vals   += locrowlens[i];
2393       jj++;
2394     }
2395   }
2396 
2397   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2398   ierr = PetscFree(buf);CHKERRQ(ierr);
2399   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2400   ierr = PetscFree(rowners);CHKERRQ(ierr);
2401   ierr = PetscFree(dlens);CHKERRQ(ierr);
2402   ierr = PetscFree(mask);CHKERRQ(ierr);
2403   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2404   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2405   *newmat = A;
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2411 /*@
2412    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2413 
2414    Input Parameters:
2415 .  mat  - the matrix
2416 .  fact - factor
2417 
2418    Collective on Mat
2419 
2420    Level: advanced
2421 
2422   Notes:
2423    This can also be set by the command line option: -mat_use_hash_table fact
2424 
2425 .keywords: matrix, hashtable, factor, HT
2426 
2427 .seealso: MatSetOption()
2428 @*/
2429 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2430 {
2431   PetscFunctionBegin;
2432   SETERRQ(1,"Function not yet written for SBAIJ format");
2433   /* PetscFunctionReturn(0); */
2434 }
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2438 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2439 {
2440   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2441   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2442   PetscReal    atmp;
2443   PetscReal    *work,*svalues,*rvalues;
2444   PetscErrorCode ierr;
2445   int i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2446   int          rank,size,*rowners_bs,dest,count,source;
2447   PetscScalar  *va;
2448   MatScalar    *ba;
2449   MPI_Status   stat;
2450 
2451   PetscFunctionBegin;
2452   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2453   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2454 
2455   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2456   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2457 
2458   bs   = a->bs;
2459   mbs  = a->mbs;
2460   Mbs  = a->Mbs;
2461   ba   = b->a;
2462   bi   = b->i;
2463   bj   = b->j;
2464   /*
2465   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2466   PetscSynchronizedFlush(A->comm);
2467   */
2468 
2469   /* find ownerships */
2470   rowners_bs = a->rowners_bs;
2471   /*
2472   if (!rank){
2473     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2474   }
2475   */
2476 
2477   /* each proc creates an array to be distributed */
2478   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2479   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2480 
2481   /* row_max for B */
2482   if (rank != size-1){
2483     for (i=0; i<mbs; i++) {
2484       ncols = bi[1] - bi[0]; bi++;
2485       brow  = bs*i;
2486       for (j=0; j<ncols; j++){
2487         bcol = bs*(*bj);
2488         for (kcol=0; kcol<bs; kcol++){
2489           col = bcol + kcol;                 /* local col index */
2490           col += rowners_bs[rank+1];      /* global col index */
2491           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2492           for (krow=0; krow<bs; krow++){
2493             atmp = PetscAbsScalar(*ba); ba++;
2494             row = brow + krow;    /* local row index */
2495             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2496             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2497             if (work[col] < atmp) work[col] = atmp;
2498           }
2499         }
2500         bj++;
2501       }
2502     }
2503     /*
2504       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2505       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2506       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2507       */
2508 
2509     /* send values to its owners */
2510     for (dest=rank+1; dest<size; dest++){
2511       svalues = work + rowners_bs[dest];
2512       count   = rowners_bs[dest+1]-rowners_bs[dest];
2513       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2514       /*
2515       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2516       PetscSynchronizedFlush(A->comm);
2517       */
2518     }
2519   }
2520 
2521   /* receive values */
2522   if (rank){
2523     rvalues = work;
2524     count   = rowners_bs[rank+1]-rowners_bs[rank];
2525     for (source=0; source<rank; source++){
2526       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2527       /* process values */
2528       for (i=0; i<count; i++){
2529         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2530       }
2531       /*
2532       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2533       PetscSynchronizedFlush(A->comm);
2534       */
2535     }
2536   }
2537 
2538   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2539   ierr = PetscFree(work);CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatRelax_MPISBAIJ"
2545 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2546 {
2547   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2548   PetscErrorCode ierr;
2549   int mbs=mat->mbs,bs=mat->bs;
2550   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2551   Vec            bb1;
2552 
2553   PetscFunctionBegin;
2554   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2555   if (bs > 1)
2556     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2557 
2558   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2559     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2560       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2561       its--;
2562     }
2563 
2564     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2565     while (its--){
2566 
2567       /* lower triangular part: slvec0b = - B^T*xx */
2568       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2569 
2570       /* copy xx into slvec0a */
2571       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2572       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2573       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2574       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2575 
2576       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2577 
2578       /* copy bb into slvec1a */
2579       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2580       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2581       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2582       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2583 
2584       /* set slvec1b = 0 */
2585       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2586 
2587       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2588       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2589       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2590       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2591 
2592       /* upper triangular part: bb1 = bb1 - B*x */
2593       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2594 
2595       /* local diagonal sweep */
2596       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2597     }
2598     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2599   } else {
2600     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2601   }
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2607 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2608 {
2609   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2610   PetscErrorCode ierr;
2611   PetscScalar    mone=-1.0;
2612   Vec            lvec1,bb1;
2613 
2614   PetscFunctionBegin;
2615   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2616   if (mat->bs > 1)
2617     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2618 
2619   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2620     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2621       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2622       its--;
2623     }
2624 
2625     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2626     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2627     while (its--){
2628       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2629 
2630       /* lower diagonal part: bb1 = bb - B^T*xx */
2631       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2632       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2633 
2634       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2635       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2636       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2637 
2638       /* upper diagonal part: bb1 = bb1 - B*x */
2639       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2640       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2641 
2642       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2643 
2644       /* diagonal sweep */
2645       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2646     }
2647     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2648     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2649   } else {
2650     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2651   }
2652   PetscFunctionReturn(0);
2653 }
2654 
2655