xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   int i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   int i,N = m*n*b->bs2;
263   MatScalar   *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   int i,N = m*n;
286   MatScalar   *vsingle;
287 
288   PetscFunctionBegin;
289   SETERRQ(1,"Function not yet written for SBAIJ format");
290   /* PetscFunctionReturn(0); */
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   int i,N = m*n*b->bs2;
300   MatScalar   *vsingle;
301 
302   PetscFunctionBegin;
303   SETERRQ(1,"Function not yet written for SBAIJ format");
304   /* PetscFunctionReturn(0); */
305 }
306 #endif
307 
308 /* Only add/insert a(i,j) with i<=j (blocks).
309    Any a(i,j) with i>j input by user is ingored.
310 */
311 #undef __FUNCT__
312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
314 {
315   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
316   MatScalar    value;
317   PetscTruth   roworiented = baij->roworiented;
318   PetscErrorCode ierr;
319   int i,j,row,col;
320   int          rstart_orig=baij->rstart_bs;
321   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
322   int          cend_orig=baij->cend_bs,bs=baij->bs;
323 
324   /* Some Variables required in the macro */
325   Mat          A = baij->A;
326   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
327   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
328   MatScalar    *aa=a->a;
329 
330   Mat          B = baij->B;
331   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
332   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
333   MatScalar    *ba=b->a;
334 
335   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
336   int          low,high,t,ridx,cidx,bs2=a->bs2;
337   MatScalar    *ap,*bap;
338 
339   /* for stash */
340   int          n_loc, *in_loc=0;
341   MatScalar    *v_loc=0;
342 
343   PetscFunctionBegin;
344 
345   if(!baij->donotstash){
346     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
347     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
348   }
349 
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_BOPT_g)
353     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
354 #endif
355     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
356       row = im[i] - rstart_orig;              /* local row index */
357       for (j=0; j<n; j++) {
358         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
359         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
360           col = in[j] - cstart_orig;          /* local col index */
361           brow = row/bs; bcol = col/bs;
362           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         } else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
369 #endif
370         else {  /* off-diag entry (B) */
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 #if defined (PETSC_USE_CTABLE)
376             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
377             col  = col - 1;
378 #else
379             col = baij->colmap[in[j]/bs] - 1;
380 #endif
381             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
382               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
383               col =  in[j];
384               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
385               B = baij->B;
386               b = (Mat_SeqBAIJ*)(B)->data;
387               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
388               ba=b->a;
389             } else col += in[j]%bs;
390           } else col = in[j];
391           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
392           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
393           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
394         }
395       }
396     } else {  /* off processor entry */
397       if (!baij->donotstash) {
398         n_loc = 0;
399         for (j=0; j<n; j++){
400           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
401           in_loc[n_loc] = in[j];
402           if (roworiented) {
403             v_loc[n_loc] = v[i*n+j];
404           } else {
405             v_loc[n_loc] = v[j*m+i];
406           }
407           n_loc++;
408         }
409         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
410       }
411     }
412   }
413 
414   if(!baij->donotstash){
415     ierr = PetscFree(in_loc);CHKERRQ(ierr);
416     ierr = PetscFree(v_loc);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
424 {
425   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
426   const MatScalar *value;
427   MatScalar       *barray=baij->barray;
428   PetscTruth      roworiented = baij->roworiented;
429   PetscErrorCode ierr;
430   int i,j,ii,jj,row,col,rstart=baij->rstart;
431   int             rend=baij->rend,cstart=baij->cstart,stepval;
432   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
433 
434   PetscFunctionBegin;
435   if(!barray) {
436     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
437     baij->barray = barray;
438   }
439 
440   if (roworiented) {
441     stepval = (n-1)*bs;
442   } else {
443     stepval = (m-1)*bs;
444   }
445   for (i=0; i<m; i++) {
446     if (im[i] < 0) continue;
447 #if defined(PETSC_USE_BOPT_g)
448     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
449 #endif
450     if (im[i] >= rstart && im[i] < rend) {
451       row = im[i] - rstart;
452       for (j=0; j<n; j++) {
453         /* If NumCol = 1 then a copy is not required */
454         if ((roworiented) && (n == 1)) {
455           barray = (MatScalar*) v + i*bs2;
456         } else if((!roworiented) && (m == 1)) {
457           barray = (MatScalar*) v + j*bs2;
458         } else { /* Here a copy is required */
459           if (roworiented) {
460             value = v + i*(stepval+bs)*bs + j*bs;
461           } else {
462             value = v + j*(stepval+bs)*bs + i*bs;
463           }
464           for (ii=0; ii<bs; ii++,value+=stepval) {
465             for (jj=0; jj<bs; jj++) {
466               *barray++  = *value++;
467             }
468           }
469           barray -=bs2;
470         }
471 
472         if (in[j] >= cstart && in[j] < cend){
473           col  = in[j] - cstart;
474           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476         else if (in[j] < 0) continue;
477 #if defined(PETSC_USE_BOPT_g)
478         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
479 #endif
480         else {
481           if (mat->was_assembled) {
482             if (!baij->colmap) {
483               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
484             }
485 
486 #if defined(PETSC_USE_BOPT_g)
487 #if defined (PETSC_USE_CTABLE)
488             { int data;
489               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
490               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
491             }
492 #else
493             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
494 #endif
495 #endif
496 #if defined (PETSC_USE_CTABLE)
497 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
498             col  = (col - 1)/bs;
499 #else
500             col = (baij->colmap[in[j]] - 1)/bs;
501 #endif
502             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
503               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
504               col =  in[j];
505             }
506           }
507           else col = in[j];
508           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510       }
511     } else {
512       if (!baij->donotstash) {
513         if (roworiented) {
514           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #define HASH_KEY 0.6180339887
525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
526 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
527 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
528 #undef __FUNCT__
529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
531 {
532   PetscFunctionBegin;
533   SETERRQ(1,"Function not yet written for SBAIJ format");
534   /* PetscFunctionReturn(0); */
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(1,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   int          bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
553   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
558     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
563         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend){
565           col = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined (PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data --;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else {
585       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
586     }
587   }
588  PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatNorm_MPISBAIJ"
593 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
594 {
595   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
596   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
597   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
598   PetscErrorCode ierr;
599   PetscReal  sum[2],*lnorm2;
600 
601   PetscFunctionBegin;
602   if (baij->size == 1) {
603     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
604   } else {
605     if (type == NORM_FROBENIUS) {
606       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
607       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
608       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
609       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
610       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
611       /*
612       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
613       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
614       */
615       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
616       /*
617       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
618       PetscSynchronizedFlush(mat->comm); */
619 
620       *norm = sqrt(sum[0] + 2*sum[1]);
621       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
622     } else {
623       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 /*
630   Creates the hash table, and sets the table
631   This table is created only once.
632   If new entried need to be added to the matrix
633   then the hash table has to be destroyed and
634   recreated.
635 */
636 #undef __FUNCT__
637 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
638 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
639 {
640   PetscFunctionBegin;
641   SETERRQ(1,"Function not yet written for SBAIJ format");
642   /* PetscFunctionReturn(0); */
643 }
644 
645 #undef __FUNCT__
646 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
647 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
648 {
649   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
650   PetscErrorCode ierr;
651   int nstash,reallocs;
652   InsertMode  addv;
653 
654   PetscFunctionBegin;
655   if (baij->donotstash) {
656     PetscFunctionReturn(0);
657   }
658 
659   /* make sure all processors are either in INSERTMODE or ADDMODE */
660   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
661   if (addv == (ADD_VALUES|INSERT_VALUES)) {
662     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
663   }
664   mat->insertmode = addv; /* in case this processor had no cache */
665 
666   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
667   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
668   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
669   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
670   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
671   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
677 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
678 {
679   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
680   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
681   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
682   PetscErrorCode ierr;
683   int         i,j,rstart,ncols,n,flg,bs2=baij->bs2;
684   int         *row,*col,other_disassembled;
685   PetscTruth  r1,r2,r3;
686   MatScalar   *val;
687   InsertMode  addv = mat->insertmode;
688 
689   PetscFunctionBegin;
690 
691   if (!baij->donotstash) {
692     while (1) {
693       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
694       /*
695       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
696       PetscSynchronizedFlush(mat->comm);
697       */
698       if (!flg) break;
699 
700       for (i=0; i<n;) {
701         /* Now identify the consecutive vals belonging to the same row */
702         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
703         if (j < n) ncols = j-i;
704         else       ncols = n-i;
705         /* Now assemble all these values with a single function call */
706         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
707         i = j;
708       }
709     }
710     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
711     /* Now process the block-stash. Since the values are stashed column-oriented,
712        set the roworiented flag to column oriented, and after MatSetValues()
713        restore the original flags */
714     r1 = baij->roworiented;
715     r2 = a->roworiented;
716     r3 = b->roworiented;
717     baij->roworiented = PETSC_FALSE;
718     a->roworiented    = PETSC_FALSE;
719     b->roworiented    = PETSC_FALSE;
720     while (1) {
721       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
722       if (!flg) break;
723 
724       for (i=0; i<n;) {
725         /* Now identify the consecutive vals belonging to the same row */
726         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
727         if (j < n) ncols = j-i;
728         else       ncols = n-i;
729         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
730         i = j;
731       }
732     }
733     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
734     baij->roworiented = r1;
735     a->roworiented    = r2;
736     b->roworiented    = r3;
737   }
738 
739   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
741 
742   /* determine if any processor has disassembled, if so we must
743      also disassemble ourselfs, in order that we may reassemble. */
744   /*
745      if nonzero structure of submatrix B cannot change then we know that
746      no processor disassembled thus we can skip this stuff
747   */
748   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
749     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
750     if (mat->was_assembled && !other_disassembled) {
751       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
752     }
753   }
754 
755   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
756     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
757   }
758   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
759   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
760 
761 #if defined(PETSC_USE_BOPT_g)
762   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
763     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
764     baij->ht_total_ct  = 0;
765     baij->ht_insert_ct = 0;
766   }
767 #endif
768   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
769     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
770     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
771     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
772   }
773 
774   if (baij->rowvalues) {
775     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
776     baij->rowvalues = 0;
777   }
778 
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
784 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
785 {
786   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
787   PetscErrorCode ierr;
788   int bs = baij->bs,size = baij->size,rank = baij->rank;
789   PetscTruth        iascii,isdraw;
790   PetscViewer       sviewer;
791   PetscViewerFormat format;
792 
793   PetscFunctionBegin;
794   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
795   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
796   if (iascii) {
797     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
798     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
799       MatInfo info;
800       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
801       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
802       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
803               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
804               baij->bs,(int)info.memory);CHKERRQ(ierr);
805       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
806       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
807       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
808       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
809       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
810       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
811       PetscFunctionReturn(0);
812     } else if (format == PETSC_VIEWER_ASCII_INFO) {
813       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
814       PetscFunctionReturn(0);
815     }
816   }
817 
818   if (isdraw) {
819     PetscDraw       draw;
820     PetscTruth isnull;
821     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
822     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
823   }
824 
825   if (size == 1) {
826     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
827     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
828   } else {
829     /* assemble the entire matrix onto first processor. */
830     Mat         A;
831     Mat_SeqSBAIJ *Aloc;
832     Mat_SeqBAIJ *Bloc;
833     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
834     MatScalar   *a;
835 
836     /* Should this be the same type as mat? */
837     if (!rank) {
838       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
839     } else {
840       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
841     }
842     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
843     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
844     PetscLogObjectParent(mat,A);
845 
846     /* copy over the A part */
847     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
848     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
849     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
850 
851     for (i=0; i<mbs; i++) {
852       rvals[0] = bs*(baij->rstart + i);
853       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
854       for (j=ai[i]; j<ai[i+1]; j++) {
855         col = (baij->cstart+aj[j])*bs;
856         for (k=0; k<bs; k++) {
857           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
858           col++; a += bs;
859         }
860       }
861     }
862     /* copy over the B part */
863     Bloc = (Mat_SeqBAIJ*)baij->B->data;
864     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
865     for (i=0; i<mbs; i++) {
866       rvals[0] = bs*(baij->rstart + i);
867       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
868       for (j=ai[i]; j<ai[i+1]; j++) {
869         col = baij->garray[aj[j]]*bs;
870         for (k=0; k<bs; k++) {
871           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
872           col++; a += bs;
873         }
874       }
875     }
876     ierr = PetscFree(rvals);CHKERRQ(ierr);
877     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
878     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
879     /*
880        Everyone has to call to draw the matrix since the graphics waits are
881        synchronized across all processors that share the PetscDraw object
882     */
883     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
884     if (!rank) {
885       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
886       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
887     }
888     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
889     ierr = MatDestroy(A);CHKERRQ(ierr);
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatView_MPISBAIJ"
896 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
897 {
898   PetscErrorCode ierr;
899   PetscTruth iascii,isdraw,issocket,isbinary;
900 
901   PetscFunctionBegin;
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
906   if (iascii || isdraw || issocket || isbinary) {
907     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
908   } else {
909     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
910   }
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatDestroy_MPISBAIJ"
916 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
917 {
918   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
919   PetscErrorCode ierr;
920 
921   PetscFunctionBegin;
922 #if defined(PETSC_USE_LOG)
923   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
924 #endif
925   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
926   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
927   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
928   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
929   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
930 #if defined (PETSC_USE_CTABLE)
931   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
932 #else
933   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
934 #endif
935   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
936   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
937   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
938   if (baij->slvec0) {
939     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
940     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
941   }
942   if (baij->slvec1) {
943     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
944     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
945     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
946   }
947   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
948   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
949   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
950   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
951 #if defined(PETSC_USE_MAT_SINGLE)
952   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
953 #endif
954   ierr = PetscFree(baij);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNCT__
959 #define __FUNCT__ "MatMult_MPISBAIJ"
960 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
961 {
962   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964   int nt,mbs=a->mbs,bs=a->bs;
965   PetscScalar *x,*from,zero=0.0;
966 
967   PetscFunctionBegin;
968   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
969   if (nt != A->n) {
970     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
971   }
972   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
973   if (nt != A->m) {
974     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
975   }
976 
977   /* diagonal part */
978   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
979   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
980 
981   /* subdiagonal part */
982   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
983 
984   /* copy x into the vec slvec0 */
985   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
986   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
987   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
988   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
989 
990   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
991   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
992   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
993 
994   /* supperdiagonal part */
995   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
996 
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1002 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1003 {
1004   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1005   PetscErrorCode ierr;
1006   int nt;
1007 
1008   PetscFunctionBegin;
1009   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1010   if (nt != A->n) {
1011     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1012   }
1013   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1014   if (nt != A->m) {
1015     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1016   }
1017 
1018   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1019   /* do diagonal part */
1020   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1021   /* do supperdiagonal part */
1022   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1023   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1024   /* do subdiagonal part */
1025   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1026   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1027   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1028 
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1034 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1035 {
1036   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   int mbs=a->mbs,bs=a->bs;
1039   PetscScalar  *x,*from,zero=0.0;
1040 
1041   PetscFunctionBegin;
1042   /*
1043   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1044   PetscSynchronizedFlush(A->comm);
1045   */
1046   /* diagonal part */
1047   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1048   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1049 
1050   /* subdiagonal part */
1051   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1052 
1053   /* copy x into the vec slvec0 */
1054   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1055   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1056   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1057   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1058 
1059   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1060   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1061   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1062 
1063   /* supperdiagonal part */
1064   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1065 
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1071 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1078   /* do diagonal part */
1079   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1080   /* do supperdiagonal part */
1081   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1082   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1083 
1084   /* do subdiagonal part */
1085   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1086   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1088 
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1094 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1095 {
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1105 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 /*
1115   This only works correctly for square matrices where the subblock A->A is the
1116    diagonal block
1117 */
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1120 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1121 {
1122   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1127   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatScale_MPISBAIJ"
1133 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1134 {
1135   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1140   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1146 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1147 {
1148   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1149   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1150   PetscErrorCode ierr;
1151   int            bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1152   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1153   int            *cmap,*idx_p,cstart = mat->cstart;
1154 
1155   PetscFunctionBegin;
1156   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1157   mat->getrowactive = PETSC_TRUE;
1158 
1159   if (!mat->rowvalues && (idx || v)) {
1160     /*
1161         allocate enough space to hold information from the longest row.
1162     */
1163     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1164     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1165     int     max = 1,mbs = mat->mbs,tmp;
1166     for (i=0; i<mbs; i++) {
1167       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1168       if (max < tmp) { max = tmp; }
1169     }
1170     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1171     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1172   }
1173 
1174   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1175   lrow = row - brstart;  /* local row index */
1176 
1177   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1178   if (!v)   {pvA = 0; pvB = 0;}
1179   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1180   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1181   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1182   nztot = nzA + nzB;
1183 
1184   cmap  = mat->garray;
1185   if (v  || idx) {
1186     if (nztot) {
1187       /* Sort by increasing column numbers, assuming A and B already sorted */
1188       int imark = -1;
1189       if (v) {
1190         *v = v_p = mat->rowvalues;
1191         for (i=0; i<nzB; i++) {
1192           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1193           else break;
1194         }
1195         imark = i;
1196         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1197         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1198       }
1199       if (idx) {
1200         *idx = idx_p = mat->rowindices;
1201         if (imark > -1) {
1202           for (i=0; i<imark; i++) {
1203             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1204           }
1205         } else {
1206           for (i=0; i<nzB; i++) {
1207             if (cmap[cworkB[i]/bs] < cstart)
1208               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1209             else break;
1210           }
1211           imark = i;
1212         }
1213         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1214         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1215       }
1216     } else {
1217       if (idx) *idx = 0;
1218       if (v)   *v   = 0;
1219     }
1220   }
1221   *nz = nztot;
1222   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1223   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1229 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1230 {
1231   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1232 
1233   PetscFunctionBegin;
1234   if (baij->getrowactive == PETSC_FALSE) {
1235     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1236   }
1237   baij->getrowactive = PETSC_FALSE;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1243 PetscErrorCode MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1244 {
1245   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1246 
1247   PetscFunctionBegin;
1248   *bs = baij->bs;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1254 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1255 {
1256   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1261   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1267 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1268 {
1269   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1270   Mat         A = a->A,B = a->B;
1271   PetscErrorCode ierr;
1272   PetscReal   isend[5],irecv[5];
1273 
1274   PetscFunctionBegin;
1275   info->block_size     = (PetscReal)a->bs;
1276   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1277   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1278   isend[3] = info->memory;  isend[4] = info->mallocs;
1279   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1280   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1281   isend[3] += info->memory;  isend[4] += info->mallocs;
1282   if (flag == MAT_LOCAL) {
1283     info->nz_used      = isend[0];
1284     info->nz_allocated = isend[1];
1285     info->nz_unneeded  = isend[2];
1286     info->memory       = isend[3];
1287     info->mallocs      = isend[4];
1288   } else if (flag == MAT_GLOBAL_MAX) {
1289     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1290     info->nz_used      = irecv[0];
1291     info->nz_allocated = irecv[1];
1292     info->nz_unneeded  = irecv[2];
1293     info->memory       = irecv[3];
1294     info->mallocs      = irecv[4];
1295   } else if (flag == MAT_GLOBAL_SUM) {
1296     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1297     info->nz_used      = irecv[0];
1298     info->nz_allocated = irecv[1];
1299     info->nz_unneeded  = irecv[2];
1300     info->memory       = irecv[3];
1301     info->mallocs      = irecv[4];
1302   } else {
1303     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1304   }
1305   info->rows_global       = (PetscReal)A->M;
1306   info->columns_global    = (PetscReal)A->N;
1307   info->rows_local        = (PetscReal)A->m;
1308   info->columns_local     = (PetscReal)A->N;
1309   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1310   info->fill_ratio_needed = 0;
1311   info->factor_mallocs    = 0;
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1317 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1318 {
1319   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   switch (op) {
1324   case MAT_NO_NEW_NONZERO_LOCATIONS:
1325   case MAT_YES_NEW_NONZERO_LOCATIONS:
1326   case MAT_COLUMNS_UNSORTED:
1327   case MAT_COLUMNS_SORTED:
1328   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1329   case MAT_KEEP_ZEROED_ROWS:
1330   case MAT_NEW_NONZERO_LOCATION_ERR:
1331     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1332     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1333     break;
1334   case MAT_ROW_ORIENTED:
1335     a->roworiented = PETSC_TRUE;
1336     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1337     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1338     break;
1339   case MAT_ROWS_SORTED:
1340   case MAT_ROWS_UNSORTED:
1341   case MAT_YES_NEW_DIAGONALS:
1342     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1343     break;
1344   case MAT_COLUMN_ORIENTED:
1345     a->roworiented = PETSC_FALSE;
1346     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1347     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1348     break;
1349   case MAT_IGNORE_OFF_PROC_ENTRIES:
1350     a->donotstash = PETSC_TRUE;
1351     break;
1352   case MAT_NO_NEW_DIAGONALS:
1353     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1354   case MAT_USE_HASH_TABLE:
1355     a->ht_flag = PETSC_TRUE;
1356     break;
1357   case MAT_NOT_SYMMETRIC:
1358   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1359   case MAT_HERMITIAN:
1360     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1361   case MAT_SYMMETRIC:
1362   case MAT_STRUCTURALLY_SYMMETRIC:
1363   case MAT_NOT_HERMITIAN:
1364   case MAT_SYMMETRY_ETERNAL:
1365   case MAT_NOT_SYMMETRY_ETERNAL:
1366     break;
1367   default:
1368     SETERRQ(PETSC_ERR_SUP,"unknown option");
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1375 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1376 {
1377   PetscErrorCode ierr;
1378   PetscFunctionBegin;
1379   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1385 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1386 {
1387   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1388   Mat         a = baij->A,b = baij->B;
1389   PetscErrorCode ierr;
1390   int s1,s2,s3;
1391 
1392   PetscFunctionBegin;
1393   if (ll != rr) {
1394     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1395   }
1396   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1397   if (rr) {
1398     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1399     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1400     /* Overlap communication with computation. */
1401     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1402     /*} if (ll) { */
1403     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1404     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1405     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1406     /* } */
1407   /* scale  the diagonal block */
1408   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1409 
1410   /* if (rr) { */
1411     /* Do a scatter end and then right scale the off-diagonal block */
1412     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1413     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1414   }
1415 
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1421 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1422 {
1423   PetscFunctionBegin;
1424   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1429 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1430 {
1431   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1432   MPI_Comm    comm = A->comm;
1433   static int  called = 0;
1434   PetscErrorCode ierr;
1435 
1436   PetscFunctionBegin;
1437   if (!a->rank) {
1438     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1439   }
1440   if (called) {PetscFunctionReturn(0);} else called = 1;
1441   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1442   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1448 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1449 {
1450   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatEqual_MPISBAIJ"
1462 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1463 {
1464   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1465   Mat         a,b,c,d;
1466   PetscTruth  flg;
1467   PetscErrorCode ierr;
1468 
1469   PetscFunctionBegin;
1470   a = matA->A; b = matA->B;
1471   c = matB->A; d = matB->B;
1472 
1473   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1474   if (flg == PETSC_TRUE) {
1475     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1476   }
1477   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1483 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1484 {
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 #undef __FUNCT__
1493 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1494 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1495 {
1496   PetscErrorCode ierr;
1497   int        i;
1498   PetscTruth flg;
1499 
1500   PetscFunctionBegin;
1501   for (i=0; i<n; i++) {
1502     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1503     if (!flg) {
1504       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1505     }
1506   }
1507   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 
1512 /* -------------------------------------------------------------------*/
1513 static struct _MatOps MatOps_Values = {
1514        MatSetValues_MPISBAIJ,
1515        MatGetRow_MPISBAIJ,
1516        MatRestoreRow_MPISBAIJ,
1517        MatMult_MPISBAIJ,
1518 /* 4*/ MatMultAdd_MPISBAIJ,
1519        MatMultTranspose_MPISBAIJ,
1520        MatMultTransposeAdd_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524 /*10*/ 0,
1525        0,
1526        0,
1527        MatRelax_MPISBAIJ,
1528        MatTranspose_MPISBAIJ,
1529 /*15*/ MatGetInfo_MPISBAIJ,
1530        MatEqual_MPISBAIJ,
1531        MatGetDiagonal_MPISBAIJ,
1532        MatDiagonalScale_MPISBAIJ,
1533        MatNorm_MPISBAIJ,
1534 /*20*/ MatAssemblyBegin_MPISBAIJ,
1535        MatAssemblyEnd_MPISBAIJ,
1536        0,
1537        MatSetOption_MPISBAIJ,
1538        MatZeroEntries_MPISBAIJ,
1539 /*25*/ MatZeroRows_MPISBAIJ,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549 /*35*/ MatDuplicate_MPISBAIJ,
1550        0,
1551        0,
1552        0,
1553        0,
1554 /*40*/ 0,
1555        MatGetSubMatrices_MPISBAIJ,
1556        MatIncreaseOverlap_MPISBAIJ,
1557        MatGetValues_MPISBAIJ,
1558        0,
1559 /*45*/ MatPrintHelp_MPISBAIJ,
1560        MatScale_MPISBAIJ,
1561        0,
1562        0,
1563        0,
1564 /*50*/ MatGetBlockSize_MPISBAIJ,
1565        0,
1566        0,
1567        0,
1568        0,
1569 /*55*/ 0,
1570        0,
1571        MatSetUnfactored_MPISBAIJ,
1572        0,
1573        MatSetValuesBlocked_MPISBAIJ,
1574 /*60*/ 0,
1575        0,
1576        0,
1577        MatGetPetscMaps_Petsc,
1578        0,
1579 /*65*/ 0,
1580        0,
1581        0,
1582        0,
1583        0,
1584 /*70*/ MatGetRowMax_MPISBAIJ,
1585        0,
1586        0,
1587        0,
1588        0,
1589 /*75*/ 0,
1590        0,
1591        0,
1592        0,
1593        0,
1594 /*80*/ 0,
1595        0,
1596        0,
1597        0,
1598 /*85*/ MatLoad_MPISBAIJ
1599 };
1600 
1601 
1602 EXTERN_C_BEGIN
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1605 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1606 {
1607   PetscFunctionBegin;
1608   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1609   *iscopy = PETSC_FALSE;
1610   PetscFunctionReturn(0);
1611 }
1612 EXTERN_C_END
1613 
1614 EXTERN_C_BEGIN
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1617 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1618 {
1619   Mat_MPISBAIJ *b;
1620   PetscErrorCode ierr;
1621   int i,mbs,Mbs;
1622 
1623   PetscFunctionBegin;
1624   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1625 
1626   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1627   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1628   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1629   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1630   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1631   if (d_nnz) {
1632     for (i=0; i<B->m/bs; i++) {
1633       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1634     }
1635   }
1636   if (o_nnz) {
1637     for (i=0; i<B->m/bs; i++) {
1638       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1639     }
1640   }
1641   B->preallocated = PETSC_TRUE;
1642   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1643   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1644   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1645   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1646 
1647   b   = (Mat_MPISBAIJ*)B->data;
1648   mbs = B->m/bs;
1649   Mbs = B->M/bs;
1650   if (mbs*bs != B->m) {
1651     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1652   }
1653 
1654   b->bs  = bs;
1655   b->bs2 = bs*bs;
1656   b->mbs = mbs;
1657   b->nbs = mbs;
1658   b->Mbs = Mbs;
1659   b->Nbs = Mbs;
1660 
1661   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1662   b->rowners[0]    = 0;
1663   for (i=2; i<=b->size; i++) {
1664     b->rowners[i] += b->rowners[i-1];
1665   }
1666   b->rstart    = b->rowners[b->rank];
1667   b->rend      = b->rowners[b->rank+1];
1668   b->cstart    = b->rstart;
1669   b->cend      = b->rend;
1670   for (i=0; i<=b->size; i++) {
1671     b->rowners_bs[i] = b->rowners[i]*bs;
1672   }
1673   b->rstart_bs = b-> rstart*bs;
1674   b->rend_bs   = b->rend*bs;
1675 
1676   b->cstart_bs = b->cstart*bs;
1677   b->cend_bs   = b->cend*bs;
1678 
1679   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1680   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1681   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1682   PetscLogObjectParent(B,b->A);
1683 
1684   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1685   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1686   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1687   PetscLogObjectParent(B,b->B);
1688 
1689   /* build cache for off array entries formed */
1690   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1691 
1692   PetscFunctionReturn(0);
1693 }
1694 EXTERN_C_END
1695 
1696 /*MC
1697    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1698    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1699 
1700    Options Database Keys:
1701 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1702 
1703   Level: beginner
1704 
1705 .seealso: MatCreateMPISBAIJ
1706 M*/
1707 
1708 EXTERN_C_BEGIN
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatCreate_MPISBAIJ"
1711 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1712 {
1713   Mat_MPISBAIJ *b;
1714   PetscErrorCode ierr;
1715   PetscTruth   flg;
1716 
1717   PetscFunctionBegin;
1718 
1719   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1720   B->data = (void*)b;
1721   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1722   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1723 
1724   B->ops->destroy    = MatDestroy_MPISBAIJ;
1725   B->ops->view       = MatView_MPISBAIJ;
1726   B->mapping    = 0;
1727   B->factor     = 0;
1728   B->assembled  = PETSC_FALSE;
1729 
1730   B->insertmode = NOT_SET_VALUES;
1731   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1732   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1733 
1734   /* build local table of row and column ownerships */
1735   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1736   b->cowners    = b->rowners + b->size + 2;
1737   b->rowners_bs = b->cowners + b->size + 2;
1738   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1739 
1740   /* build cache for off array entries formed */
1741   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1742   b->donotstash  = PETSC_FALSE;
1743   b->colmap      = PETSC_NULL;
1744   b->garray      = PETSC_NULL;
1745   b->roworiented = PETSC_TRUE;
1746 
1747 #if defined(PETSC_USE_MAT_SINGLE)
1748   /* stuff for MatSetValues_XXX in single precision */
1749   b->setvalueslen     = 0;
1750   b->setvaluescopy    = PETSC_NULL;
1751 #endif
1752 
1753   /* stuff used in block assembly */
1754   b->barray       = 0;
1755 
1756   /* stuff used for matrix vector multiply */
1757   b->lvec         = 0;
1758   b->Mvctx        = 0;
1759   b->slvec0       = 0;
1760   b->slvec0b      = 0;
1761   b->slvec1       = 0;
1762   b->slvec1a      = 0;
1763   b->slvec1b      = 0;
1764   b->sMvctx       = 0;
1765 
1766   /* stuff for MatGetRow() */
1767   b->rowindices   = 0;
1768   b->rowvalues    = 0;
1769   b->getrowactive = PETSC_FALSE;
1770 
1771   /* hash table stuff */
1772   b->ht           = 0;
1773   b->hd           = 0;
1774   b->ht_size      = 0;
1775   b->ht_flag      = PETSC_FALSE;
1776   b->ht_fact      = 0;
1777   b->ht_total_ct  = 0;
1778   b->ht_insert_ct = 0;
1779 
1780   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1781   if (flg) {
1782     PetscReal fact = 1.39;
1783     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1784     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1785     if (fact <= 1.0) fact = 1.39;
1786     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1787     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1788   }
1789   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1790                                      "MatStoreValues_MPISBAIJ",
1791                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1793                                      "MatRetrieveValues_MPISBAIJ",
1794                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1795   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1796                                      "MatGetDiagonalBlock_MPISBAIJ",
1797                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1798   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1799                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1800                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1801   B->symmetric                  = PETSC_TRUE;
1802   B->structurally_symmetric     = PETSC_TRUE;
1803   B->symmetric_set              = PETSC_TRUE;
1804   B->structurally_symmetric_set = PETSC_TRUE;
1805   PetscFunctionReturn(0);
1806 }
1807 EXTERN_C_END
1808 
1809 /*MC
1810    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1811 
1812    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1813    and MATMPISBAIJ otherwise.
1814 
1815    Options Database Keys:
1816 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1817 
1818   Level: beginner
1819 
1820 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1821 M*/
1822 
1823 EXTERN_C_BEGIN
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatCreate_SBAIJ"
1826 PetscErrorCode MatCreate_SBAIJ(Mat A)
1827 {
1828   PetscErrorCode ierr;
1829   int size;
1830 
1831   PetscFunctionBegin;
1832   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1833   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1834   if (size == 1) {
1835     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1836   } else {
1837     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1838   }
1839   PetscFunctionReturn(0);
1840 }
1841 EXTERN_C_END
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1845 /*@C
1846    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1847    the user should preallocate the matrix storage by setting the parameters
1848    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1849    performance can be increased by more than a factor of 50.
1850 
1851    Collective on Mat
1852 
1853    Input Parameters:
1854 +  A - the matrix
1855 .  bs   - size of blockk
1856 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1857            submatrix  (same for all local rows)
1858 .  d_nnz - array containing the number of block nonzeros in the various block rows
1859            in the upper triangular and diagonal part of the in diagonal portion of the local
1860            (possibly different for each block row) or PETSC_NULL.  You must leave room
1861            for the diagonal entry even if it is zero.
1862 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1863            submatrix (same for all local rows).
1864 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1865            off-diagonal portion of the local submatrix (possibly different for
1866            each block row) or PETSC_NULL.
1867 
1868 
1869    Options Database Keys:
1870 .   -mat_no_unroll - uses code that does not unroll the loops in the
1871                      block calculations (much slower)
1872 .   -mat_block_size - size of the blocks to use
1873 
1874    Notes:
1875 
1876    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1877    than it must be used on all processors that share the object for that argument.
1878 
1879    Storage Information:
1880    For a square global matrix we define each processor's diagonal portion
1881    to be its local rows and the corresponding columns (a square submatrix);
1882    each processor's off-diagonal portion encompasses the remainder of the
1883    local matrix (a rectangular submatrix).
1884 
1885    The user can specify preallocated storage for the diagonal part of
1886    the local submatrix with either d_nz or d_nnz (not both).  Set
1887    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1888    memory allocation.  Likewise, specify preallocated storage for the
1889    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1890 
1891    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1892    the figure below we depict these three local rows and all columns (0-11).
1893 
1894 .vb
1895            0 1 2 3 4 5 6 7 8 9 10 11
1896           -------------------
1897    row 3  |  o o o d d d o o o o o o
1898    row 4  |  o o o d d d o o o o o o
1899    row 5  |  o o o d d d o o o o o o
1900           -------------------
1901 .ve
1902 
1903    Thus, any entries in the d locations are stored in the d (diagonal)
1904    submatrix, and any entries in the o locations are stored in the
1905    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1906    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1907 
1908    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1909    plus the diagonal part of the d matrix,
1910    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1911    In general, for PDE problems in which most nonzeros are near the diagonal,
1912    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1913    or you will get TERRIBLE performance; see the users' manual chapter on
1914    matrices.
1915 
1916    Level: intermediate
1917 
1918 .keywords: matrix, block, aij, compressed row, sparse, parallel
1919 
1920 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1921 @*/
1922 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1923 {
1924   PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1925 
1926   PetscFunctionBegin;
1927   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1928   if (f) {
1929     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1930   }
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatCreateMPISBAIJ"
1936 /*@C
1937    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1938    (block compressed row).  For good matrix assembly performance
1939    the user should preallocate the matrix storage by setting the parameters
1940    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1941    performance can be increased by more than a factor of 50.
1942 
1943    Collective on MPI_Comm
1944 
1945    Input Parameters:
1946 +  comm - MPI communicator
1947 .  bs   - size of blockk
1948 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1949            This value should be the same as the local size used in creating the
1950            y vector for the matrix-vector product y = Ax.
1951 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1952            This value should be the same as the local size used in creating the
1953            x vector for the matrix-vector product y = Ax.
1954 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1955 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1956 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1957            submatrix  (same for all local rows)
1958 .  d_nnz - array containing the number of block nonzeros in the various block rows
1959            in the upper triangular portion of the in diagonal portion of the local
1960            (possibly different for each block block row) or PETSC_NULL.
1961            You must leave room for the diagonal entry even if it is zero.
1962 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1963            submatrix (same for all local rows).
1964 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1965            off-diagonal portion of the local submatrix (possibly different for
1966            each block row) or PETSC_NULL.
1967 
1968    Output Parameter:
1969 .  A - the matrix
1970 
1971    Options Database Keys:
1972 .   -mat_no_unroll - uses code that does not unroll the loops in the
1973                      block calculations (much slower)
1974 .   -mat_block_size - size of the blocks to use
1975 .   -mat_mpi - use the parallel matrix data structures even on one processor
1976                (defaults to using SeqBAIJ format on one processor)
1977 
1978    Notes:
1979    The user MUST specify either the local or global matrix dimensions
1980    (possibly both).
1981 
1982    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1983    than it must be used on all processors that share the object for that argument.
1984 
1985    Storage Information:
1986    For a square global matrix we define each processor's diagonal portion
1987    to be its local rows and the corresponding columns (a square submatrix);
1988    each processor's off-diagonal portion encompasses the remainder of the
1989    local matrix (a rectangular submatrix).
1990 
1991    The user can specify preallocated storage for the diagonal part of
1992    the local submatrix with either d_nz or d_nnz (not both).  Set
1993    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1994    memory allocation.  Likewise, specify preallocated storage for the
1995    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1996 
1997    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1998    the figure below we depict these three local rows and all columns (0-11).
1999 
2000 .vb
2001            0 1 2 3 4 5 6 7 8 9 10 11
2002           -------------------
2003    row 3  |  o o o d d d o o o o o o
2004    row 4  |  o o o d d d o o o o o o
2005    row 5  |  o o o d d d o o o o o o
2006           -------------------
2007 .ve
2008 
2009    Thus, any entries in the d locations are stored in the d (diagonal)
2010    submatrix, and any entries in the o locations are stored in the
2011    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2012    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2013 
2014    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2015    plus the diagonal part of the d matrix,
2016    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2017    In general, for PDE problems in which most nonzeros are near the diagonal,
2018    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2019    or you will get TERRIBLE performance; see the users' manual chapter on
2020    matrices.
2021 
2022    Level: intermediate
2023 
2024 .keywords: matrix, block, aij, compressed row, sparse, parallel
2025 
2026 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2027 @*/
2028 
2029 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2030 {
2031   PetscErrorCode ierr;
2032   int size;
2033 
2034   PetscFunctionBegin;
2035   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2036   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2037   if (size > 1) {
2038     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2039     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2040   } else {
2041     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2042     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2050 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2051 {
2052   Mat          mat;
2053   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2054   PetscErrorCode ierr;
2055   int len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2056   PetscScalar  *array;
2057 
2058   PetscFunctionBegin;
2059   *newmat       = 0;
2060   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2061   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2062   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2063 
2064   mat->factor       = matin->factor;
2065   mat->preallocated = PETSC_TRUE;
2066   mat->assembled    = PETSC_TRUE;
2067   mat->insertmode   = NOT_SET_VALUES;
2068 
2069   a = (Mat_MPISBAIJ*)mat->data;
2070   a->bs  = oldmat->bs;
2071   a->bs2 = oldmat->bs2;
2072   a->mbs = oldmat->mbs;
2073   a->nbs = oldmat->nbs;
2074   a->Mbs = oldmat->Mbs;
2075   a->Nbs = oldmat->Nbs;
2076 
2077   a->rstart       = oldmat->rstart;
2078   a->rend         = oldmat->rend;
2079   a->cstart       = oldmat->cstart;
2080   a->cend         = oldmat->cend;
2081   a->size         = oldmat->size;
2082   a->rank         = oldmat->rank;
2083   a->donotstash   = oldmat->donotstash;
2084   a->roworiented  = oldmat->roworiented;
2085   a->rowindices   = 0;
2086   a->rowvalues    = 0;
2087   a->getrowactive = PETSC_FALSE;
2088   a->barray       = 0;
2089   a->rstart_bs    = oldmat->rstart_bs;
2090   a->rend_bs      = oldmat->rend_bs;
2091   a->cstart_bs    = oldmat->cstart_bs;
2092   a->cend_bs      = oldmat->cend_bs;
2093 
2094   /* hash table stuff */
2095   a->ht           = 0;
2096   a->hd           = 0;
2097   a->ht_size      = 0;
2098   a->ht_flag      = oldmat->ht_flag;
2099   a->ht_fact      = oldmat->ht_fact;
2100   a->ht_total_ct  = 0;
2101   a->ht_insert_ct = 0;
2102 
2103   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2104   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2105   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2106   if (oldmat->colmap) {
2107 #if defined (PETSC_USE_CTABLE)
2108     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2109 #else
2110     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2111     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2112     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2113 #endif
2114   } else a->colmap = 0;
2115 
2116   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2117     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2118     PetscLogObjectMemory(mat,len*sizeof(int));
2119     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2120   } else a->garray = 0;
2121 
2122   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2123   PetscLogObjectParent(mat,a->lvec);
2124   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2125   PetscLogObjectParent(mat,a->Mvctx);
2126 
2127   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2128   PetscLogObjectParent(mat,a->slvec0);
2129   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2130   PetscLogObjectParent(mat,a->slvec1);
2131 
2132   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2133   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2134   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2135   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2136   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2137   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2138   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2139   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2140   PetscLogObjectParent(mat,a->slvec0);
2141   PetscLogObjectParent(mat,a->slvec1);
2142   PetscLogObjectParent(mat,a->slvec0b);
2143   PetscLogObjectParent(mat,a->slvec1a);
2144   PetscLogObjectParent(mat,a->slvec1b);
2145 
2146   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2147   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2148   a->sMvctx = oldmat->sMvctx;
2149   PetscLogObjectParent(mat,a->sMvctx);
2150 
2151   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2152   PetscLogObjectParent(mat,a->A);
2153   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2154   PetscLogObjectParent(mat,a->B);
2155   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2156   *newmat = mat;
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 #include "petscsys.h"
2161 
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatLoad_MPISBAIJ"
2164 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2165 {
2166   Mat          A;
2167   PetscErrorCode ierr;
2168   int          i,nz,j,rstart,rend,fd;
2169   PetscScalar  *vals,*buf;
2170   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2171   MPI_Status   status;
2172   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2173   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2174   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2175   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2176   int          dcount,kmax,k,nzcount,tmp;
2177 
2178   PetscFunctionBegin;
2179   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2180 
2181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2183   if (!rank) {
2184     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2185     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2186     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2187     if (header[3] < 0) {
2188       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2189     }
2190   }
2191 
2192   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2193   M = header[1]; N = header[2];
2194 
2195   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2196 
2197   /*
2198      This code adds extra rows to make sure the number of rows is
2199      divisible by the blocksize
2200   */
2201   Mbs        = M/bs;
2202   extra_rows = bs - M + bs*(Mbs);
2203   if (extra_rows == bs) extra_rows = 0;
2204   else                  Mbs++;
2205   if (extra_rows &&!rank) {
2206     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2207   }
2208 
2209   /* determine ownership of all rows */
2210   mbs        = Mbs/size + ((Mbs % size) > rank);
2211   m          = mbs*bs;
2212   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2213   browners   = rowners + size + 1;
2214   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2215   rowners[0] = 0;
2216   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2217   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2218   rstart = rowners[rank];
2219   rend   = rowners[rank+1];
2220 
2221   /* distribute row lengths to all processors */
2222   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2223   if (!rank) {
2224     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2225     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2226     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2227     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2228     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2229     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2230     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2231   } else {
2232     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2233   }
2234 
2235   if (!rank) {   /* procs[0] */
2236     /* calculate the number of nonzeros on each processor */
2237     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2238     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2239     for (i=0; i<size; i++) {
2240       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2241         procsnz[i] += rowlengths[j];
2242       }
2243     }
2244     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2245 
2246     /* determine max buffer needed and allocate it */
2247     maxnz = 0;
2248     for (i=0; i<size; i++) {
2249       maxnz = PetscMax(maxnz,procsnz[i]);
2250     }
2251     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2252 
2253     /* read in my part of the matrix column indices  */
2254     nz     = procsnz[0];
2255     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2256     mycols = ibuf;
2257     if (size == 1)  nz -= extra_rows;
2258     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2259     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2260 
2261     /* read in every ones (except the last) and ship off */
2262     for (i=1; i<size-1; i++) {
2263       nz   = procsnz[i];
2264       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2265       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2266     }
2267     /* read in the stuff for the last proc */
2268     if (size != 1) {
2269       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2270       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2271       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2272       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2273     }
2274     ierr = PetscFree(cols);CHKERRQ(ierr);
2275   } else {  /* procs[i], i>0 */
2276     /* determine buffer space needed for message */
2277     nz = 0;
2278     for (i=0; i<m; i++) {
2279       nz += locrowlens[i];
2280     }
2281     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2282     mycols = ibuf;
2283     /* receive message of column indices*/
2284     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2285     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2287   }
2288 
2289   /* loop over local rows, determining number of off diagonal entries */
2290   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2291   odlens   = dlens + (rend-rstart);
2292   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2293   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2294   masked1  = mask    + Mbs;
2295   masked2  = masked1 + Mbs;
2296   rowcount = 0; nzcount = 0;
2297   for (i=0; i<mbs; i++) {
2298     dcount  = 0;
2299     odcount = 0;
2300     for (j=0; j<bs; j++) {
2301       kmax = locrowlens[rowcount];
2302       for (k=0; k<kmax; k++) {
2303         tmp = mycols[nzcount++]/bs; /* block col. index */
2304         if (!mask[tmp]) {
2305           mask[tmp] = 1;
2306           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2307           else masked1[dcount++] = tmp; /* entry in diag portion */
2308         }
2309       }
2310       rowcount++;
2311     }
2312 
2313     dlens[i]  = dcount;  /* d_nzz[i] */
2314     odlens[i] = odcount; /* o_nzz[i] */
2315 
2316     /* zero out the mask elements we set */
2317     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2318     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2319   }
2320 
2321   /* create our matrix */
2322   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2323   ierr = MatSetType(A,type);CHKERRQ(ierr);
2324   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2325   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2326 
2327   if (!rank) {
2328     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2329     /* read in my part of the matrix numerical values  */
2330     nz = procsnz[0];
2331     vals = buf;
2332     mycols = ibuf;
2333     if (size == 1)  nz -= extra_rows;
2334     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2335     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2336 
2337     /* insert into matrix */
2338     jj      = rstart*bs;
2339     for (i=0; i<m; i++) {
2340       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2341       mycols += locrowlens[i];
2342       vals   += locrowlens[i];
2343       jj++;
2344     }
2345 
2346     /* read in other processors (except the last one) and ship out */
2347     for (i=1; i<size-1; i++) {
2348       nz   = procsnz[i];
2349       vals = buf;
2350       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2351       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2352     }
2353     /* the last proc */
2354     if (size != 1){
2355       nz   = procsnz[i] - extra_rows;
2356       vals = buf;
2357       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2358       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2359       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2360     }
2361     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2362 
2363   } else {
2364     /* receive numeric values */
2365     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2366 
2367     /* receive message of values*/
2368     vals   = buf;
2369     mycols = ibuf;
2370     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2371     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2372     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2373 
2374     /* insert into matrix */
2375     jj      = rstart*bs;
2376     for (i=0; i<m; i++) {
2377       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2378       mycols += locrowlens[i];
2379       vals   += locrowlens[i];
2380       jj++;
2381     }
2382   }
2383 
2384   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2385   ierr = PetscFree(buf);CHKERRQ(ierr);
2386   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2387   ierr = PetscFree(rowners);CHKERRQ(ierr);
2388   ierr = PetscFree(dlens);CHKERRQ(ierr);
2389   ierr = PetscFree(mask);CHKERRQ(ierr);
2390   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   *newmat = A;
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2398 /*@
2399    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2400 
2401    Input Parameters:
2402 .  mat  - the matrix
2403 .  fact - factor
2404 
2405    Collective on Mat
2406 
2407    Level: advanced
2408 
2409   Notes:
2410    This can also be set by the command line option: -mat_use_hash_table fact
2411 
2412 .keywords: matrix, hashtable, factor, HT
2413 
2414 .seealso: MatSetOption()
2415 @*/
2416 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2417 {
2418   PetscFunctionBegin;
2419   SETERRQ(1,"Function not yet written for SBAIJ format");
2420   /* PetscFunctionReturn(0); */
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2425 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2426 {
2427   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2428   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2429   PetscReal    atmp;
2430   PetscReal    *work,*svalues,*rvalues;
2431   PetscErrorCode ierr;
2432   int i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2433   int          rank,size,*rowners_bs,dest,count,source;
2434   PetscScalar  *va;
2435   MatScalar    *ba;
2436   MPI_Status   stat;
2437 
2438   PetscFunctionBegin;
2439   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2440   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2441 
2442   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2443   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2444 
2445   bs   = a->bs;
2446   mbs  = a->mbs;
2447   Mbs  = a->Mbs;
2448   ba   = b->a;
2449   bi   = b->i;
2450   bj   = b->j;
2451   /*
2452   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2453   PetscSynchronizedFlush(A->comm);
2454   */
2455 
2456   /* find ownerships */
2457   rowners_bs = a->rowners_bs;
2458   /*
2459   if (!rank){
2460     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2461   }
2462   */
2463 
2464   /* each proc creates an array to be distributed */
2465   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2466   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2467 
2468   /* row_max for B */
2469   if (rank != size-1){
2470     for (i=0; i<mbs; i++) {
2471       ncols = bi[1] - bi[0]; bi++;
2472       brow  = bs*i;
2473       for (j=0; j<ncols; j++){
2474         bcol = bs*(*bj);
2475         for (kcol=0; kcol<bs; kcol++){
2476           col = bcol + kcol;                 /* local col index */
2477           col += rowners_bs[rank+1];      /* global col index */
2478           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2479           for (krow=0; krow<bs; krow++){
2480             atmp = PetscAbsScalar(*ba); ba++;
2481             row = brow + krow;    /* local row index */
2482             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2483             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2484             if (work[col] < atmp) work[col] = atmp;
2485           }
2486         }
2487         bj++;
2488       }
2489     }
2490     /*
2491       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2492       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2493       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2494       */
2495 
2496     /* send values to its owners */
2497     for (dest=rank+1; dest<size; dest++){
2498       svalues = work + rowners_bs[dest];
2499       count   = rowners_bs[dest+1]-rowners_bs[dest];
2500       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2501       /*
2502       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2503       PetscSynchronizedFlush(A->comm);
2504       */
2505     }
2506   }
2507 
2508   /* receive values */
2509   if (rank){
2510     rvalues = work;
2511     count   = rowners_bs[rank+1]-rowners_bs[rank];
2512     for (source=0; source<rank; source++){
2513       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2514       /* process values */
2515       for (i=0; i<count; i++){
2516         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2517       }
2518       /*
2519       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2520       PetscSynchronizedFlush(A->comm);
2521       */
2522     }
2523   }
2524 
2525   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2526   ierr = PetscFree(work);CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatRelax_MPISBAIJ"
2532 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2533 {
2534   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2535   PetscErrorCode ierr;
2536   int mbs=mat->mbs,bs=mat->bs;
2537   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2538   Vec            bb1;
2539 
2540   PetscFunctionBegin;
2541   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2542   if (bs > 1)
2543     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2544 
2545   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2546     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2547       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2548       its--;
2549     }
2550 
2551     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2552     while (its--){
2553 
2554       /* lower triangular part: slvec0b = - B^T*xx */
2555       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2556 
2557       /* copy xx into slvec0a */
2558       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2559       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2560       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2561       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2562 
2563       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2564 
2565       /* copy bb into slvec1a */
2566       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2567       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2568       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2569       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2570 
2571       /* set slvec1b = 0 */
2572       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2573 
2574       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2575       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2576       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2577       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2578 
2579       /* upper triangular part: bb1 = bb1 - B*x */
2580       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2581 
2582       /* local diagonal sweep */
2583       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2584     }
2585     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2586   } else {
2587     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2588   }
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 #undef __FUNCT__
2593 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2594 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2595 {
2596   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2597   PetscErrorCode ierr;
2598   PetscScalar    mone=-1.0;
2599   Vec            lvec1,bb1;
2600 
2601   PetscFunctionBegin;
2602   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2603   if (mat->bs > 1)
2604     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2605 
2606   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2607     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2608       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2609       its--;
2610     }
2611 
2612     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2613     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2614     while (its--){
2615       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2616 
2617       /* lower diagonal part: bb1 = bb - B^T*xx */
2618       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2619       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2620 
2621       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2622       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2623       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2624 
2625       /* upper diagonal part: bb1 = bb1 - B*x */
2626       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2627       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2628 
2629       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2630 
2631       /* diagonal sweep */
2632       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2633     }
2634     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2635     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2636   } else {
2637     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2638   }
2639   PetscFunctionReturn(0);
2640 }
2641 
2642