xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision dfbe8321245cdf9338262824a41580dace4e4143)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   int i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   int i,N = m*n*b->bs2;
263   MatScalar   *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
281 PetscErrorCode MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   int i,N = m*n;
286   MatScalar   *vsingle;
287 
288   PetscFunctionBegin;
289   SETERRQ(1,"Function not yet written for SBAIJ format");
290   /* PetscFunctionReturn(0); */
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
295 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   int i,N = m*n*b->bs2;
300   MatScalar   *vsingle;
301 
302   PetscFunctionBegin;
303   SETERRQ(1,"Function not yet written for SBAIJ format");
304   /* PetscFunctionReturn(0); */
305 }
306 #endif
307 
308 /* Only add/insert a(i,j) with i<=j (blocks).
309    Any a(i,j) with i>j input by user is ingored.
310 */
311 #undef __FUNCT__
312 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
313 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
314 {
315   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
316   MatScalar    value;
317   PetscTruth   roworiented = baij->roworiented;
318   PetscErrorCode ierr;
319   int i,j,row,col;
320   int          rstart_orig=baij->rstart_bs;
321   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
322   int          cend_orig=baij->cend_bs,bs=baij->bs;
323 
324   /* Some Variables required in the macro */
325   Mat          A = baij->A;
326   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
327   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
328   MatScalar    *aa=a->a;
329 
330   Mat          B = baij->B;
331   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
332   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
333   MatScalar    *ba=b->a;
334 
335   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
336   int          low,high,t,ridx,cidx,bs2=a->bs2;
337   MatScalar    *ap,*bap;
338 
339   /* for stash */
340   int          n_loc, *in_loc=0;
341   MatScalar    *v_loc=0;
342 
343   PetscFunctionBegin;
344 
345   if(!baij->donotstash){
346     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
347     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
348   }
349 
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_BOPT_g)
353     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
354 #endif
355     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
356       row = im[i] - rstart_orig;              /* local row index */
357       for (j=0; j<n; j++) {
358         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
359         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
360           col = in[j] - cstart_orig;          /* local col index */
361           brow = row/bs; bcol = col/bs;
362           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         } else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
369 #endif
370         else {  /* off-diag entry (B) */
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 #if defined (PETSC_USE_CTABLE)
376             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
377             col  = col - 1;
378 #else
379             col = baij->colmap[in[j]/bs] - 1;
380 #endif
381             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
382               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
383               col =  in[j];
384               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
385               B = baij->B;
386               b = (Mat_SeqBAIJ*)(B)->data;
387               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
388               ba=b->a;
389             } else col += in[j]%bs;
390           } else col = in[j];
391           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
392           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
393           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
394         }
395       }
396     } else {  /* off processor entry */
397       if (!baij->donotstash) {
398         n_loc = 0;
399         for (j=0; j<n; j++){
400           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
401           in_loc[n_loc] = in[j];
402           if (roworiented) {
403             v_loc[n_loc] = v[i*n+j];
404           } else {
405             v_loc[n_loc] = v[j*m+i];
406           }
407           n_loc++;
408         }
409         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
410       }
411     }
412   }
413 
414   if(!baij->donotstash){
415     ierr = PetscFree(in_loc);CHKERRQ(ierr);
416     ierr = PetscFree(v_loc);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
423 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
424 {
425   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
426   const MatScalar *value;
427   MatScalar       *barray=baij->barray;
428   PetscTruth      roworiented = baij->roworiented;
429   PetscErrorCode ierr;
430   int i,j,ii,jj,row,col,rstart=baij->rstart;
431   int             rend=baij->rend,cstart=baij->cstart,stepval;
432   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
433 
434   PetscFunctionBegin;
435   if(!barray) {
436     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
437     baij->barray = barray;
438   }
439 
440   if (roworiented) {
441     stepval = (n-1)*bs;
442   } else {
443     stepval = (m-1)*bs;
444   }
445   for (i=0; i<m; i++) {
446     if (im[i] < 0) continue;
447 #if defined(PETSC_USE_BOPT_g)
448     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
449 #endif
450     if (im[i] >= rstart && im[i] < rend) {
451       row = im[i] - rstart;
452       for (j=0; j<n; j++) {
453         /* If NumCol = 1 then a copy is not required */
454         if ((roworiented) && (n == 1)) {
455           barray = (MatScalar*) v + i*bs2;
456         } else if((!roworiented) && (m == 1)) {
457           barray = (MatScalar*) v + j*bs2;
458         } else { /* Here a copy is required */
459           if (roworiented) {
460             value = v + i*(stepval+bs)*bs + j*bs;
461           } else {
462             value = v + j*(stepval+bs)*bs + i*bs;
463           }
464           for (ii=0; ii<bs; ii++,value+=stepval) {
465             for (jj=0; jj<bs; jj++) {
466               *barray++  = *value++;
467             }
468           }
469           barray -=bs2;
470         }
471 
472         if (in[j] >= cstart && in[j] < cend){
473           col  = in[j] - cstart;
474           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476         else if (in[j] < 0) continue;
477 #if defined(PETSC_USE_BOPT_g)
478         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
479 #endif
480         else {
481           if (mat->was_assembled) {
482             if (!baij->colmap) {
483               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
484             }
485 
486 #if defined(PETSC_USE_BOPT_g)
487 #if defined (PETSC_USE_CTABLE)
488             { int data;
489               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
490               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
491             }
492 #else
493             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
494 #endif
495 #endif
496 #if defined (PETSC_USE_CTABLE)
497 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
498             col  = (col - 1)/bs;
499 #else
500             col = (baij->colmap[in[j]] - 1)/bs;
501 #endif
502             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
503               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
504               col =  in[j];
505             }
506           }
507           else col = in[j];
508           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510       }
511     } else {
512       if (!baij->donotstash) {
513         if (roworiented) {
514           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #define HASH_KEY 0.6180339887
525 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
526 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
527 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
528 #undef __FUNCT__
529 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
530 PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
531 {
532   PetscFunctionBegin;
533   SETERRQ(1,"Function not yet written for SBAIJ format");
534   /* PetscFunctionReturn(0); */
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
539 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(1,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
551   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
552   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
553 
554   PetscFunctionBegin;
555   for (i=0; i<m; i++) {
556     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
557     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
558     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
559       row = idxm[i] - bsrstart;
560       for (j=0; j<n; j++) {
561         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
562         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
563         if (idxn[j] >= bscstart && idxn[j] < bscend){
564           col = idxn[j] - bscstart;
565           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
566         } else {
567           if (!baij->colmap) {
568             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
569           }
570 #if defined (PETSC_USE_CTABLE)
571           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
572           data --;
573 #else
574           data = baij->colmap[idxn[j]/bs]-1;
575 #endif
576           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
577           else {
578             col  = data + idxn[j]%bs;
579             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
580           }
581         }
582       }
583     } else {
584       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
585     }
586   }
587  PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatNorm_MPISBAIJ"
592 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
593 {
594   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
595   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
596   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
597   PetscErrorCode ierr;
598   PetscReal  sum[2],*lnorm2;
599 
600   PetscFunctionBegin;
601   if (baij->size == 1) {
602     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
603   } else {
604     if (type == NORM_FROBENIUS) {
605       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
606       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
607       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
608       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
609       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
610       /*
611       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
612       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
613       */
614       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
615       /*
616       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
617       PetscSynchronizedFlush(mat->comm); */
618 
619       *norm = sqrt(sum[0] + 2*sum[1]);
620       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
621     } else {
622       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 /*
629   Creates the hash table, and sets the table
630   This table is created only once.
631   If new entried need to be added to the matrix
632   then the hash table has to be destroyed and
633   recreated.
634 */
635 #undef __FUNCT__
636 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
637 PetscErrorCode MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
638 {
639   PetscFunctionBegin;
640   SETERRQ(1,"Function not yet written for SBAIJ format");
641   /* PetscFunctionReturn(0); */
642 }
643 
644 #undef __FUNCT__
645 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
646 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
647 {
648   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
649   PetscErrorCode ierr;
650   int nstash,reallocs;
651   InsertMode  addv;
652 
653   PetscFunctionBegin;
654   if (baij->donotstash) {
655     PetscFunctionReturn(0);
656   }
657 
658   /* make sure all processors are either in INSERTMODE or ADDMODE */
659   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
660   if (addv == (ADD_VALUES|INSERT_VALUES)) {
661     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
662   }
663   mat->insertmode = addv; /* in case this processor had no cache */
664 
665   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
666   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
667   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
668   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
669   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
670   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
676 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
677 {
678   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
679   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
680   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
681   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
682   int         *row,*col,other_disassembled;
683   PetscTruth  r1,r2,r3;
684   MatScalar   *val;
685   InsertMode  addv = mat->insertmode;
686 
687   PetscFunctionBegin;
688 
689   if (!baij->donotstash) {
690     while (1) {
691       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
692       /*
693       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
694       PetscSynchronizedFlush(mat->comm);
695       */
696       if (!flg) break;
697 
698       for (i=0; i<n;) {
699         /* Now identify the consecutive vals belonging to the same row */
700         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
701         if (j < n) ncols = j-i;
702         else       ncols = n-i;
703         /* Now assemble all these values with a single function call */
704         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
705         i = j;
706       }
707     }
708     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
709     /* Now process the block-stash. Since the values are stashed column-oriented,
710        set the roworiented flag to column oriented, and after MatSetValues()
711        restore the original flags */
712     r1 = baij->roworiented;
713     r2 = a->roworiented;
714     r3 = b->roworiented;
715     baij->roworiented = PETSC_FALSE;
716     a->roworiented    = PETSC_FALSE;
717     b->roworiented    = PETSC_FALSE;
718     while (1) {
719       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
720       if (!flg) break;
721 
722       for (i=0; i<n;) {
723         /* Now identify the consecutive vals belonging to the same row */
724         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
725         if (j < n) ncols = j-i;
726         else       ncols = n-i;
727         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
728         i = j;
729       }
730     }
731     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
732     baij->roworiented = r1;
733     a->roworiented    = r2;
734     b->roworiented    = r3;
735   }
736 
737   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
738   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
739 
740   /* determine if any processor has disassembled, if so we must
741      also disassemble ourselfs, in order that we may reassemble. */
742   /*
743      if nonzero structure of submatrix B cannot change then we know that
744      no processor disassembled thus we can skip this stuff
745   */
746   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
747     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
748     if (mat->was_assembled && !other_disassembled) {
749       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
750     }
751   }
752 
753   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
754     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
755   }
756   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
757   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
758 
759 #if defined(PETSC_USE_BOPT_g)
760   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
761     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
762     baij->ht_total_ct  = 0;
763     baij->ht_insert_ct = 0;
764   }
765 #endif
766   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
767     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
768     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
769     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
770   }
771 
772   if (baij->rowvalues) {
773     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
774     baij->rowvalues = 0;
775   }
776 
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
782 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
783 {
784   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
785   PetscErrorCode ierr;
786   int bs = baij->bs,size = baij->size,rank = baij->rank;
787   PetscTruth        iascii,isdraw;
788   PetscViewer       sviewer;
789   PetscViewerFormat format;
790 
791   PetscFunctionBegin;
792   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
793   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
794   if (iascii) {
795     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
796     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
797       MatInfo info;
798       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
799       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
800       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
801               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
802               baij->bs,(int)info.memory);CHKERRQ(ierr);
803       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
804       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
805       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
806       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
807       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
808       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
809       PetscFunctionReturn(0);
810     } else if (format == PETSC_VIEWER_ASCII_INFO) {
811       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
812       PetscFunctionReturn(0);
813     }
814   }
815 
816   if (isdraw) {
817     PetscDraw       draw;
818     PetscTruth isnull;
819     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
820     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
821   }
822 
823   if (size == 1) {
824     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
825     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
826   } else {
827     /* assemble the entire matrix onto first processor. */
828     Mat         A;
829     Mat_SeqSBAIJ *Aloc;
830     Mat_SeqBAIJ *Bloc;
831     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
832     MatScalar   *a;
833 
834     /* Should this be the same type as mat? */
835     if (!rank) {
836       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
837     } else {
838       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
839     }
840     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
841     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
842     PetscLogObjectParent(mat,A);
843 
844     /* copy over the A part */
845     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
846     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
847     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
848 
849     for (i=0; i<mbs; i++) {
850       rvals[0] = bs*(baij->rstart + i);
851       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
852       for (j=ai[i]; j<ai[i+1]; j++) {
853         col = (baij->cstart+aj[j])*bs;
854         for (k=0; k<bs; k++) {
855           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
856           col++; a += bs;
857         }
858       }
859     }
860     /* copy over the B part */
861     Bloc = (Mat_SeqBAIJ*)baij->B->data;
862     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
863     for (i=0; i<mbs; i++) {
864       rvals[0] = bs*(baij->rstart + i);
865       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
866       for (j=ai[i]; j<ai[i+1]; j++) {
867         col = baij->garray[aj[j]]*bs;
868         for (k=0; k<bs; k++) {
869           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
870           col++; a += bs;
871         }
872       }
873     }
874     ierr = PetscFree(rvals);CHKERRQ(ierr);
875     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
876     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
877     /*
878        Everyone has to call to draw the matrix since the graphics waits are
879        synchronized across all processors that share the PetscDraw object
880     */
881     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
882     if (!rank) {
883       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
884       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
885     }
886     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
887     ierr = MatDestroy(A);CHKERRQ(ierr);
888   }
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatView_MPISBAIJ"
894 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
895 {
896   PetscErrorCode ierr;
897   PetscTruth iascii,isdraw,issocket,isbinary;
898 
899   PetscFunctionBegin;
900   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
901   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
904   if (iascii || isdraw || issocket || isbinary) {
905     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
906   } else {
907     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
908   }
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "MatDestroy_MPISBAIJ"
914 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
915 {
916   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
917   PetscErrorCode ierr;
918 
919   PetscFunctionBegin;
920 #if defined(PETSC_USE_LOG)
921   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
922 #endif
923   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
924   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
925   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
926   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
927   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
928 #if defined (PETSC_USE_CTABLE)
929   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
930 #else
931   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
932 #endif
933   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
934   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
935   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
936   if (baij->slvec0) {
937     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
938     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
939   }
940   if (baij->slvec1) {
941     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
942     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
943     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
944   }
945   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
946   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
947   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
948   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
949 #if defined(PETSC_USE_MAT_SINGLE)
950   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
951 #endif
952   ierr = PetscFree(baij);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 #undef __FUNCT__
957 #define __FUNCT__ "MatMult_MPISBAIJ"
958 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
959 {
960   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
961   PetscErrorCode ierr;
962   int nt,mbs=a->mbs,bs=a->bs;
963   PetscScalar *x,*from,zero=0.0;
964 
965   PetscFunctionBegin;
966   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
967   if (nt != A->n) {
968     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
969   }
970   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
971   if (nt != A->m) {
972     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
973   }
974 
975   /* diagonal part */
976   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
977   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
978 
979   /* subdiagonal part */
980   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
981 
982   /* copy x into the vec slvec0 */
983   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
984   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
985   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
986   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
987 
988   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
989   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
990   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
991 
992   /* supperdiagonal part */
993   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
994 
995   PetscFunctionReturn(0);
996 }
997 
998 #undef __FUNCT__
999 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1000 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1001 {
1002   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1003   PetscErrorCode ierr;
1004   int nt;
1005 
1006   PetscFunctionBegin;
1007   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1008   if (nt != A->n) {
1009     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1010   }
1011   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1012   if (nt != A->m) {
1013     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1014   }
1015 
1016   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1017   /* do diagonal part */
1018   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1019   /* do supperdiagonal part */
1020   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1021   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1022   /* do subdiagonal part */
1023   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1024   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1025   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1026 
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1032 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1033 {
1034   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1035   PetscErrorCode ierr;
1036   int mbs=a->mbs,bs=a->bs;
1037   PetscScalar  *x,*from,zero=0.0;
1038 
1039   PetscFunctionBegin;
1040   /*
1041   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1042   PetscSynchronizedFlush(A->comm);
1043   */
1044   /* diagonal part */
1045   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1046   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1047 
1048   /* subdiagonal part */
1049   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1050 
1051   /* copy x into the vec slvec0 */
1052   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1053   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1054   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1055   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1056 
1057   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1058   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1059   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1060 
1061   /* supperdiagonal part */
1062   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1063 
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1069 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1070 {
1071   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1072   PetscErrorCode ierr;
1073 
1074   PetscFunctionBegin;
1075   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1076   /* do diagonal part */
1077   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1078   /* do supperdiagonal part */
1079   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1080   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1081 
1082   /* do subdiagonal part */
1083   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1084   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1085   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1086 
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1092 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1093 {
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1103 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1104 {
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113   This only works correctly for square matrices where the subblock A->A is the
1114    diagonal block
1115 */
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1118 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1119 {
1120   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1125   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatScale_MPISBAIJ"
1131 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1132 {
1133   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1138   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1144 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1145 {
1146   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1147   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1148   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1149   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1150   int            *cmap,*idx_p,cstart = mat->cstart;
1151 
1152   PetscFunctionBegin;
1153   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1154   mat->getrowactive = PETSC_TRUE;
1155 
1156   if (!mat->rowvalues && (idx || v)) {
1157     /*
1158         allocate enough space to hold information from the longest row.
1159     */
1160     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1161     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1162     int     max = 1,mbs = mat->mbs,tmp;
1163     for (i=0; i<mbs; i++) {
1164       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1165       if (max < tmp) { max = tmp; }
1166     }
1167     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1168     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1169   }
1170 
1171   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1172   lrow = row - brstart;  /* local row index */
1173 
1174   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1175   if (!v)   {pvA = 0; pvB = 0;}
1176   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1177   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1178   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1179   nztot = nzA + nzB;
1180 
1181   cmap  = mat->garray;
1182   if (v  || idx) {
1183     if (nztot) {
1184       /* Sort by increasing column numbers, assuming A and B already sorted */
1185       int imark = -1;
1186       if (v) {
1187         *v = v_p = mat->rowvalues;
1188         for (i=0; i<nzB; i++) {
1189           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1190           else break;
1191         }
1192         imark = i;
1193         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1194         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1195       }
1196       if (idx) {
1197         *idx = idx_p = mat->rowindices;
1198         if (imark > -1) {
1199           for (i=0; i<imark; i++) {
1200             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1201           }
1202         } else {
1203           for (i=0; i<nzB; i++) {
1204             if (cmap[cworkB[i]/bs] < cstart)
1205               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1206             else break;
1207           }
1208           imark = i;
1209         }
1210         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1211         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1212       }
1213     } else {
1214       if (idx) *idx = 0;
1215       if (v)   *v   = 0;
1216     }
1217   }
1218   *nz = nztot;
1219   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1220   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1226 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1227 {
1228   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1229 
1230   PetscFunctionBegin;
1231   if (baij->getrowactive == PETSC_FALSE) {
1232     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1233   }
1234   baij->getrowactive = PETSC_FALSE;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1240 PetscErrorCode MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1241 {
1242   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1243 
1244   PetscFunctionBegin;
1245   *bs = baij->bs;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1251 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1252 {
1253   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1258   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1264 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1265 {
1266   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1267   Mat         A = a->A,B = a->B;
1268   PetscErrorCode ierr;
1269   PetscReal   isend[5],irecv[5];
1270 
1271   PetscFunctionBegin;
1272   info->block_size     = (PetscReal)a->bs;
1273   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1274   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1275   isend[3] = info->memory;  isend[4] = info->mallocs;
1276   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1277   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1278   isend[3] += info->memory;  isend[4] += info->mallocs;
1279   if (flag == MAT_LOCAL) {
1280     info->nz_used      = isend[0];
1281     info->nz_allocated = isend[1];
1282     info->nz_unneeded  = isend[2];
1283     info->memory       = isend[3];
1284     info->mallocs      = isend[4];
1285   } else if (flag == MAT_GLOBAL_MAX) {
1286     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1287     info->nz_used      = irecv[0];
1288     info->nz_allocated = irecv[1];
1289     info->nz_unneeded  = irecv[2];
1290     info->memory       = irecv[3];
1291     info->mallocs      = irecv[4];
1292   } else if (flag == MAT_GLOBAL_SUM) {
1293     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1294     info->nz_used      = irecv[0];
1295     info->nz_allocated = irecv[1];
1296     info->nz_unneeded  = irecv[2];
1297     info->memory       = irecv[3];
1298     info->mallocs      = irecv[4];
1299   } else {
1300     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1301   }
1302   info->rows_global       = (PetscReal)A->M;
1303   info->columns_global    = (PetscReal)A->N;
1304   info->rows_local        = (PetscReal)A->m;
1305   info->columns_local     = (PetscReal)A->N;
1306   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1307   info->fill_ratio_needed = 0;
1308   info->factor_mallocs    = 0;
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1314 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1315 {
1316   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   switch (op) {
1321   case MAT_NO_NEW_NONZERO_LOCATIONS:
1322   case MAT_YES_NEW_NONZERO_LOCATIONS:
1323   case MAT_COLUMNS_UNSORTED:
1324   case MAT_COLUMNS_SORTED:
1325   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1326   case MAT_KEEP_ZEROED_ROWS:
1327   case MAT_NEW_NONZERO_LOCATION_ERR:
1328     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1329     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1330     break;
1331   case MAT_ROW_ORIENTED:
1332     a->roworiented = PETSC_TRUE;
1333     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1334     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1335     break;
1336   case MAT_ROWS_SORTED:
1337   case MAT_ROWS_UNSORTED:
1338   case MAT_YES_NEW_DIAGONALS:
1339     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1340     break;
1341   case MAT_COLUMN_ORIENTED:
1342     a->roworiented = PETSC_FALSE;
1343     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1344     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1345     break;
1346   case MAT_IGNORE_OFF_PROC_ENTRIES:
1347     a->donotstash = PETSC_TRUE;
1348     break;
1349   case MAT_NO_NEW_DIAGONALS:
1350     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1351   case MAT_USE_HASH_TABLE:
1352     a->ht_flag = PETSC_TRUE;
1353     break;
1354   case MAT_NOT_SYMMETRIC:
1355   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1356   case MAT_HERMITIAN:
1357     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1358   case MAT_SYMMETRIC:
1359   case MAT_STRUCTURALLY_SYMMETRIC:
1360   case MAT_NOT_HERMITIAN:
1361   case MAT_SYMMETRY_ETERNAL:
1362   case MAT_NOT_SYMMETRY_ETERNAL:
1363     break;
1364   default:
1365     SETERRQ(PETSC_ERR_SUP,"unknown option");
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1372 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1373 {
1374   PetscErrorCode ierr;
1375   PetscFunctionBegin;
1376   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1382 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1383 {
1384   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1385   Mat         a = baij->A,b = baij->B;
1386   PetscErrorCode ierr;
1387   int s1,s2,s3;
1388 
1389   PetscFunctionBegin;
1390   if (ll != rr) {
1391     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1392   }
1393   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1394   if (rr) {
1395     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1396     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1397     /* Overlap communication with computation. */
1398     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1399     /*} if (ll) { */
1400     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1401     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1402     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1403     /* } */
1404   /* scale  the diagonal block */
1405   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1406 
1407   /* if (rr) { */
1408     /* Do a scatter end and then right scale the off-diagonal block */
1409     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1410     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1411   }
1412 
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1418 PetscErrorCode MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1419 {
1420   PetscFunctionBegin;
1421   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1426 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1427 {
1428   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1429   MPI_Comm    comm = A->comm;
1430   static int  called = 0;
1431   PetscErrorCode ierr;
1432 
1433   PetscFunctionBegin;
1434   if (!a->rank) {
1435     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1436   }
1437   if (called) {PetscFunctionReturn(0);} else called = 1;
1438   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1439   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1445 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1446 {
1447   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1448   PetscErrorCode ierr;
1449 
1450   PetscFunctionBegin;
1451   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatEqual_MPISBAIJ"
1459 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1460 {
1461   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1462   Mat         a,b,c,d;
1463   PetscTruth  flg;
1464   PetscErrorCode ierr;
1465 
1466   PetscFunctionBegin;
1467   a = matA->A; b = matA->B;
1468   c = matB->A; d = matB->B;
1469 
1470   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1471   if (flg == PETSC_TRUE) {
1472     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1473   }
1474   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1480 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1481 {
1482   PetscErrorCode ierr;
1483 
1484   PetscFunctionBegin;
1485   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1491 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1492 {
1493   int        i,ierr;
1494   PetscTruth flg;
1495 
1496   for (i=0; i<n; i++) {
1497     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1498     if (!flg) {
1499       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1500     }
1501   }
1502   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 
1507 /* -------------------------------------------------------------------*/
1508 static struct _MatOps MatOps_Values = {
1509        MatSetValues_MPISBAIJ,
1510        MatGetRow_MPISBAIJ,
1511        MatRestoreRow_MPISBAIJ,
1512        MatMult_MPISBAIJ,
1513 /* 4*/ MatMultAdd_MPISBAIJ,
1514        MatMultTranspose_MPISBAIJ,
1515        MatMultTransposeAdd_MPISBAIJ,
1516        0,
1517        0,
1518        0,
1519 /*10*/ 0,
1520        0,
1521        0,
1522        MatRelax_MPISBAIJ,
1523        MatTranspose_MPISBAIJ,
1524 /*15*/ MatGetInfo_MPISBAIJ,
1525        MatEqual_MPISBAIJ,
1526        MatGetDiagonal_MPISBAIJ,
1527        MatDiagonalScale_MPISBAIJ,
1528        MatNorm_MPISBAIJ,
1529 /*20*/ MatAssemblyBegin_MPISBAIJ,
1530        MatAssemblyEnd_MPISBAIJ,
1531        0,
1532        MatSetOption_MPISBAIJ,
1533        MatZeroEntries_MPISBAIJ,
1534 /*25*/ MatZeroRows_MPISBAIJ,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*35*/ MatDuplicate_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549 /*40*/ 0,
1550        MatGetSubMatrices_MPISBAIJ,
1551        MatIncreaseOverlap_MPISBAIJ,
1552        MatGetValues_MPISBAIJ,
1553        0,
1554 /*45*/ MatPrintHelp_MPISBAIJ,
1555        MatScale_MPISBAIJ,
1556        0,
1557        0,
1558        0,
1559 /*50*/ MatGetBlockSize_MPISBAIJ,
1560        0,
1561        0,
1562        0,
1563        0,
1564 /*55*/ 0,
1565        0,
1566        MatSetUnfactored_MPISBAIJ,
1567        0,
1568        MatSetValuesBlocked_MPISBAIJ,
1569 /*60*/ 0,
1570        0,
1571        0,
1572        MatGetPetscMaps_Petsc,
1573        0,
1574 /*65*/ 0,
1575        0,
1576        0,
1577        0,
1578        0,
1579 /*70*/ MatGetRowMax_MPISBAIJ,
1580        0,
1581        0,
1582        0,
1583        0,
1584 /*75*/ 0,
1585        0,
1586        0,
1587        0,
1588        0,
1589 /*80*/ 0,
1590        0,
1591        0,
1592        0,
1593 /*85*/ MatLoad_MPISBAIJ
1594 };
1595 
1596 
1597 EXTERN_C_BEGIN
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1600 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1601 {
1602   PetscFunctionBegin;
1603   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1604   *iscopy = PETSC_FALSE;
1605   PetscFunctionReturn(0);
1606 }
1607 EXTERN_C_END
1608 
1609 EXTERN_C_BEGIN
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1612 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1613 {
1614   Mat_MPISBAIJ *b;
1615   PetscErrorCode ierr;
1616   int i,mbs,Mbs;
1617 
1618   PetscFunctionBegin;
1619   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1620 
1621   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1622   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1623   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1624   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1625   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1626   if (d_nnz) {
1627     for (i=0; i<B->m/bs; i++) {
1628       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1629     }
1630   }
1631   if (o_nnz) {
1632     for (i=0; i<B->m/bs; i++) {
1633       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1634     }
1635   }
1636   B->preallocated = PETSC_TRUE;
1637   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1638   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1639   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1640   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1641 
1642   b   = (Mat_MPISBAIJ*)B->data;
1643   mbs = B->m/bs;
1644   Mbs = B->M/bs;
1645   if (mbs*bs != B->m) {
1646     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1647   }
1648 
1649   b->bs  = bs;
1650   b->bs2 = bs*bs;
1651   b->mbs = mbs;
1652   b->nbs = mbs;
1653   b->Mbs = Mbs;
1654   b->Nbs = Mbs;
1655 
1656   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1657   b->rowners[0]    = 0;
1658   for (i=2; i<=b->size; i++) {
1659     b->rowners[i] += b->rowners[i-1];
1660   }
1661   b->rstart    = b->rowners[b->rank];
1662   b->rend      = b->rowners[b->rank+1];
1663   b->cstart    = b->rstart;
1664   b->cend      = b->rend;
1665   for (i=0; i<=b->size; i++) {
1666     b->rowners_bs[i] = b->rowners[i]*bs;
1667   }
1668   b->rstart_bs = b-> rstart*bs;
1669   b->rend_bs   = b->rend*bs;
1670 
1671   b->cstart_bs = b->cstart*bs;
1672   b->cend_bs   = b->cend*bs;
1673 
1674   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1675   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1676   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1677   PetscLogObjectParent(B,b->A);
1678 
1679   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1680   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1681   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1682   PetscLogObjectParent(B,b->B);
1683 
1684   /* build cache for off array entries formed */
1685   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1686 
1687   PetscFunctionReturn(0);
1688 }
1689 EXTERN_C_END
1690 
1691 /*MC
1692    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1693    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1694 
1695    Options Database Keys:
1696 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1697 
1698   Level: beginner
1699 
1700 .seealso: MatCreateMPISBAIJ
1701 M*/
1702 
1703 EXTERN_C_BEGIN
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatCreate_MPISBAIJ"
1706 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1707 {
1708   Mat_MPISBAIJ *b;
1709   PetscErrorCode ierr;
1710   PetscTruth   flg;
1711 
1712   PetscFunctionBegin;
1713 
1714   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1715   B->data = (void*)b;
1716   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1717   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1718 
1719   B->ops->destroy    = MatDestroy_MPISBAIJ;
1720   B->ops->view       = MatView_MPISBAIJ;
1721   B->mapping    = 0;
1722   B->factor     = 0;
1723   B->assembled  = PETSC_FALSE;
1724 
1725   B->insertmode = NOT_SET_VALUES;
1726   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1727   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1728 
1729   /* build local table of row and column ownerships */
1730   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1731   b->cowners    = b->rowners + b->size + 2;
1732   b->rowners_bs = b->cowners + b->size + 2;
1733   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1734 
1735   /* build cache for off array entries formed */
1736   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1737   b->donotstash  = PETSC_FALSE;
1738   b->colmap      = PETSC_NULL;
1739   b->garray      = PETSC_NULL;
1740   b->roworiented = PETSC_TRUE;
1741 
1742 #if defined(PETSC_USE_MAT_SINGLE)
1743   /* stuff for MatSetValues_XXX in single precision */
1744   b->setvalueslen     = 0;
1745   b->setvaluescopy    = PETSC_NULL;
1746 #endif
1747 
1748   /* stuff used in block assembly */
1749   b->barray       = 0;
1750 
1751   /* stuff used for matrix vector multiply */
1752   b->lvec         = 0;
1753   b->Mvctx        = 0;
1754   b->slvec0       = 0;
1755   b->slvec0b      = 0;
1756   b->slvec1       = 0;
1757   b->slvec1a      = 0;
1758   b->slvec1b      = 0;
1759   b->sMvctx       = 0;
1760 
1761   /* stuff for MatGetRow() */
1762   b->rowindices   = 0;
1763   b->rowvalues    = 0;
1764   b->getrowactive = PETSC_FALSE;
1765 
1766   /* hash table stuff */
1767   b->ht           = 0;
1768   b->hd           = 0;
1769   b->ht_size      = 0;
1770   b->ht_flag      = PETSC_FALSE;
1771   b->ht_fact      = 0;
1772   b->ht_total_ct  = 0;
1773   b->ht_insert_ct = 0;
1774 
1775   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1776   if (flg) {
1777     PetscReal fact = 1.39;
1778     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1779     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1780     if (fact <= 1.0) fact = 1.39;
1781     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1782     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1783   }
1784   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1785                                      "MatStoreValues_MPISBAIJ",
1786                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1788                                      "MatRetrieveValues_MPISBAIJ",
1789                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1790   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1791                                      "MatGetDiagonalBlock_MPISBAIJ",
1792                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1794                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1795                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1796   B->symmetric                  = PETSC_TRUE;
1797   B->structurally_symmetric     = PETSC_TRUE;
1798   B->symmetric_set              = PETSC_TRUE;
1799   B->structurally_symmetric_set = PETSC_TRUE;
1800   PetscFunctionReturn(0);
1801 }
1802 EXTERN_C_END
1803 
1804 /*MC
1805    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1806 
1807    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1808    and MATMPISBAIJ otherwise.
1809 
1810    Options Database Keys:
1811 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1812 
1813   Level: beginner
1814 
1815 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1816 M*/
1817 
1818 EXTERN_C_BEGIN
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatCreate_SBAIJ"
1821 PetscErrorCode MatCreate_SBAIJ(Mat A)
1822 {
1823   PetscErrorCode ierr,size;
1824 
1825   PetscFunctionBegin;
1826   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1827   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1828   if (size == 1) {
1829     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1830   } else {
1831     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1832   }
1833   PetscFunctionReturn(0);
1834 }
1835 EXTERN_C_END
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1839 /*@C
1840    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1841    the user should preallocate the matrix storage by setting the parameters
1842    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1843    performance can be increased by more than a factor of 50.
1844 
1845    Collective on Mat
1846 
1847    Input Parameters:
1848 +  A - the matrix
1849 .  bs   - size of blockk
1850 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1851            submatrix  (same for all local rows)
1852 .  d_nnz - array containing the number of block nonzeros in the various block rows
1853            in the upper triangular and diagonal part of the in diagonal portion of the local
1854            (possibly different for each block row) or PETSC_NULL.  You must leave room
1855            for the diagonal entry even if it is zero.
1856 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1857            submatrix (same for all local rows).
1858 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1859            off-diagonal portion of the local submatrix (possibly different for
1860            each block row) or PETSC_NULL.
1861 
1862 
1863    Options Database Keys:
1864 .   -mat_no_unroll - uses code that does not unroll the loops in the
1865                      block calculations (much slower)
1866 .   -mat_block_size - size of the blocks to use
1867 
1868    Notes:
1869 
1870    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1871    than it must be used on all processors that share the object for that argument.
1872 
1873    Storage Information:
1874    For a square global matrix we define each processor's diagonal portion
1875    to be its local rows and the corresponding columns (a square submatrix);
1876    each processor's off-diagonal portion encompasses the remainder of the
1877    local matrix (a rectangular submatrix).
1878 
1879    The user can specify preallocated storage for the diagonal part of
1880    the local submatrix with either d_nz or d_nnz (not both).  Set
1881    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1882    memory allocation.  Likewise, specify preallocated storage for the
1883    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1884 
1885    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1886    the figure below we depict these three local rows and all columns (0-11).
1887 
1888 .vb
1889            0 1 2 3 4 5 6 7 8 9 10 11
1890           -------------------
1891    row 3  |  o o o d d d o o o o o o
1892    row 4  |  o o o d d d o o o o o o
1893    row 5  |  o o o d d d o o o o o o
1894           -------------------
1895 .ve
1896 
1897    Thus, any entries in the d locations are stored in the d (diagonal)
1898    submatrix, and any entries in the o locations are stored in the
1899    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1900    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1901 
1902    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1903    plus the diagonal part of the d matrix,
1904    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1905    In general, for PDE problems in which most nonzeros are near the diagonal,
1906    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1907    or you will get TERRIBLE performance; see the users' manual chapter on
1908    matrices.
1909 
1910    Level: intermediate
1911 
1912 .keywords: matrix, block, aij, compressed row, sparse, parallel
1913 
1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1915 @*/
1916 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1917 {
1918   PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1919 
1920   PetscFunctionBegin;
1921   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1922   if (f) {
1923     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatCreateMPISBAIJ"
1930 /*@C
1931    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1932    (block compressed row).  For good matrix assembly performance
1933    the user should preallocate the matrix storage by setting the parameters
1934    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1935    performance can be increased by more than a factor of 50.
1936 
1937    Collective on MPI_Comm
1938 
1939    Input Parameters:
1940 +  comm - MPI communicator
1941 .  bs   - size of blockk
1942 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1943            This value should be the same as the local size used in creating the
1944            y vector for the matrix-vector product y = Ax.
1945 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1946            This value should be the same as the local size used in creating the
1947            x vector for the matrix-vector product y = Ax.
1948 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1949 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1950 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1951            submatrix  (same for all local rows)
1952 .  d_nnz - array containing the number of block nonzeros in the various block rows
1953            in the upper triangular portion of the in diagonal portion of the local
1954            (possibly different for each block block row) or PETSC_NULL.
1955            You must leave room for the diagonal entry even if it is zero.
1956 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1957            submatrix (same for all local rows).
1958 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1959            off-diagonal portion of the local submatrix (possibly different for
1960            each block row) or PETSC_NULL.
1961 
1962    Output Parameter:
1963 .  A - the matrix
1964 
1965    Options Database Keys:
1966 .   -mat_no_unroll - uses code that does not unroll the loops in the
1967                      block calculations (much slower)
1968 .   -mat_block_size - size of the blocks to use
1969 .   -mat_mpi - use the parallel matrix data structures even on one processor
1970                (defaults to using SeqBAIJ format on one processor)
1971 
1972    Notes:
1973    The user MUST specify either the local or global matrix dimensions
1974    (possibly both).
1975 
1976    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1977    than it must be used on all processors that share the object for that argument.
1978 
1979    Storage Information:
1980    For a square global matrix we define each processor's diagonal portion
1981    to be its local rows and the corresponding columns (a square submatrix);
1982    each processor's off-diagonal portion encompasses the remainder of the
1983    local matrix (a rectangular submatrix).
1984 
1985    The user can specify preallocated storage for the diagonal part of
1986    the local submatrix with either d_nz or d_nnz (not both).  Set
1987    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1988    memory allocation.  Likewise, specify preallocated storage for the
1989    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1990 
1991    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1992    the figure below we depict these three local rows and all columns (0-11).
1993 
1994 .vb
1995            0 1 2 3 4 5 6 7 8 9 10 11
1996           -------------------
1997    row 3  |  o o o d d d o o o o o o
1998    row 4  |  o o o d d d o o o o o o
1999    row 5  |  o o o d d d o o o o o o
2000           -------------------
2001 .ve
2002 
2003    Thus, any entries in the d locations are stored in the d (diagonal)
2004    submatrix, and any entries in the o locations are stored in the
2005    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2006    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2007 
2008    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2009    plus the diagonal part of the d matrix,
2010    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2011    In general, for PDE problems in which most nonzeros are near the diagonal,
2012    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2013    or you will get TERRIBLE performance; see the users' manual chapter on
2014    matrices.
2015 
2016    Level: intermediate
2017 
2018 .keywords: matrix, block, aij, compressed row, sparse, parallel
2019 
2020 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2021 @*/
2022 
2023 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2024 {
2025   PetscErrorCode ierr,size;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2029   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2030   if (size > 1) {
2031     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2032     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2033   } else {
2034     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2035     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2036   }
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2043 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2044 {
2045   Mat          mat;
2046   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2047   PetscErrorCode ierr;
2048   int len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2049   PetscScalar  *array;
2050 
2051   PetscFunctionBegin;
2052   *newmat       = 0;
2053   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2054   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2055   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2056 
2057   mat->factor       = matin->factor;
2058   mat->preallocated = PETSC_TRUE;
2059   mat->assembled    = PETSC_TRUE;
2060   mat->insertmode   = NOT_SET_VALUES;
2061 
2062   a = (Mat_MPISBAIJ*)mat->data;
2063   a->bs  = oldmat->bs;
2064   a->bs2 = oldmat->bs2;
2065   a->mbs = oldmat->mbs;
2066   a->nbs = oldmat->nbs;
2067   a->Mbs = oldmat->Mbs;
2068   a->Nbs = oldmat->Nbs;
2069 
2070   a->rstart       = oldmat->rstart;
2071   a->rend         = oldmat->rend;
2072   a->cstart       = oldmat->cstart;
2073   a->cend         = oldmat->cend;
2074   a->size         = oldmat->size;
2075   a->rank         = oldmat->rank;
2076   a->donotstash   = oldmat->donotstash;
2077   a->roworiented  = oldmat->roworiented;
2078   a->rowindices   = 0;
2079   a->rowvalues    = 0;
2080   a->getrowactive = PETSC_FALSE;
2081   a->barray       = 0;
2082   a->rstart_bs    = oldmat->rstart_bs;
2083   a->rend_bs      = oldmat->rend_bs;
2084   a->cstart_bs    = oldmat->cstart_bs;
2085   a->cend_bs      = oldmat->cend_bs;
2086 
2087   /* hash table stuff */
2088   a->ht           = 0;
2089   a->hd           = 0;
2090   a->ht_size      = 0;
2091   a->ht_flag      = oldmat->ht_flag;
2092   a->ht_fact      = oldmat->ht_fact;
2093   a->ht_total_ct  = 0;
2094   a->ht_insert_ct = 0;
2095 
2096   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2097   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2098   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2099   if (oldmat->colmap) {
2100 #if defined (PETSC_USE_CTABLE)
2101     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2102 #else
2103     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2104     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2105     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2106 #endif
2107   } else a->colmap = 0;
2108 
2109   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2110     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2111     PetscLogObjectMemory(mat,len*sizeof(int));
2112     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2113   } else a->garray = 0;
2114 
2115   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2116   PetscLogObjectParent(mat,a->lvec);
2117   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2118   PetscLogObjectParent(mat,a->Mvctx);
2119 
2120   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2121   PetscLogObjectParent(mat,a->slvec0);
2122   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2123   PetscLogObjectParent(mat,a->slvec1);
2124 
2125   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2126   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2127   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2128   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2129   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2130   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2131   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2132   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2133   PetscLogObjectParent(mat,a->slvec0);
2134   PetscLogObjectParent(mat,a->slvec1);
2135   PetscLogObjectParent(mat,a->slvec0b);
2136   PetscLogObjectParent(mat,a->slvec1a);
2137   PetscLogObjectParent(mat,a->slvec1b);
2138 
2139   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2140   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2141   a->sMvctx = oldmat->sMvctx;
2142   PetscLogObjectParent(mat,a->sMvctx);
2143 
2144   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2145   PetscLogObjectParent(mat,a->A);
2146   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2147   PetscLogObjectParent(mat,a->B);
2148   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2149   *newmat = mat;
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #include "petscsys.h"
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatLoad_MPISBAIJ"
2157 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2158 {
2159   Mat          A;
2160   int          i,nz,ierr,j,rstart,rend,fd;
2161   PetscScalar  *vals,*buf;
2162   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2163   MPI_Status   status;
2164   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2165   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2166   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2167   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2168   int          dcount,kmax,k,nzcount,tmp;
2169 
2170   PetscFunctionBegin;
2171   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2172 
2173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2174   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2175   if (!rank) {
2176     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2177     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2178     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2179     if (header[3] < 0) {
2180       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2181     }
2182   }
2183 
2184   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2185   M = header[1]; N = header[2];
2186 
2187   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2188 
2189   /*
2190      This code adds extra rows to make sure the number of rows is
2191      divisible by the blocksize
2192   */
2193   Mbs        = M/bs;
2194   extra_rows = bs - M + bs*(Mbs);
2195   if (extra_rows == bs) extra_rows = 0;
2196   else                  Mbs++;
2197   if (extra_rows &&!rank) {
2198     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2199   }
2200 
2201   /* determine ownership of all rows */
2202   mbs        = Mbs/size + ((Mbs % size) > rank);
2203   m          = mbs*bs;
2204   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2205   browners   = rowners + size + 1;
2206   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2207   rowners[0] = 0;
2208   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2209   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2210   rstart = rowners[rank];
2211   rend   = rowners[rank+1];
2212 
2213   /* distribute row lengths to all processors */
2214   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2215   if (!rank) {
2216     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2217     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2218     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2219     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2220     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2221     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2222     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2223   } else {
2224     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2225   }
2226 
2227   if (!rank) {   /* procs[0] */
2228     /* calculate the number of nonzeros on each processor */
2229     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2230     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2231     for (i=0; i<size; i++) {
2232       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2233         procsnz[i] += rowlengths[j];
2234       }
2235     }
2236     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2237 
2238     /* determine max buffer needed and allocate it */
2239     maxnz = 0;
2240     for (i=0; i<size; i++) {
2241       maxnz = PetscMax(maxnz,procsnz[i]);
2242     }
2243     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2244 
2245     /* read in my part of the matrix column indices  */
2246     nz     = procsnz[0];
2247     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2248     mycols = ibuf;
2249     if (size == 1)  nz -= extra_rows;
2250     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2251     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2252 
2253     /* read in every ones (except the last) and ship off */
2254     for (i=1; i<size-1; i++) {
2255       nz   = procsnz[i];
2256       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2257       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2258     }
2259     /* read in the stuff for the last proc */
2260     if (size != 1) {
2261       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2262       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2263       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2264       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2265     }
2266     ierr = PetscFree(cols);CHKERRQ(ierr);
2267   } else {  /* procs[i], i>0 */
2268     /* determine buffer space needed for message */
2269     nz = 0;
2270     for (i=0; i<m; i++) {
2271       nz += locrowlens[i];
2272     }
2273     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2274     mycols = ibuf;
2275     /* receive message of column indices*/
2276     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2277     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2278     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2279   }
2280 
2281   /* loop over local rows, determining number of off diagonal entries */
2282   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2283   odlens   = dlens + (rend-rstart);
2284   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2285   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2286   masked1  = mask    + Mbs;
2287   masked2  = masked1 + Mbs;
2288   rowcount = 0; nzcount = 0;
2289   for (i=0; i<mbs; i++) {
2290     dcount  = 0;
2291     odcount = 0;
2292     for (j=0; j<bs; j++) {
2293       kmax = locrowlens[rowcount];
2294       for (k=0; k<kmax; k++) {
2295         tmp = mycols[nzcount++]/bs; /* block col. index */
2296         if (!mask[tmp]) {
2297           mask[tmp] = 1;
2298           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2299           else masked1[dcount++] = tmp; /* entry in diag portion */
2300         }
2301       }
2302       rowcount++;
2303     }
2304 
2305     dlens[i]  = dcount;  /* d_nzz[i] */
2306     odlens[i] = odcount; /* o_nzz[i] */
2307 
2308     /* zero out the mask elements we set */
2309     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2310     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2311   }
2312 
2313   /* create our matrix */
2314   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2315   ierr = MatSetType(A,type);CHKERRQ(ierr);
2316   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2317   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2318 
2319   if (!rank) {
2320     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2321     /* read in my part of the matrix numerical values  */
2322     nz = procsnz[0];
2323     vals = buf;
2324     mycols = ibuf;
2325     if (size == 1)  nz -= extra_rows;
2326     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2327     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2328 
2329     /* insert into matrix */
2330     jj      = rstart*bs;
2331     for (i=0; i<m; i++) {
2332       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2333       mycols += locrowlens[i];
2334       vals   += locrowlens[i];
2335       jj++;
2336     }
2337 
2338     /* read in other processors (except the last one) and ship out */
2339     for (i=1; i<size-1; i++) {
2340       nz   = procsnz[i];
2341       vals = buf;
2342       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2343       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2344     }
2345     /* the last proc */
2346     if (size != 1){
2347       nz   = procsnz[i] - extra_rows;
2348       vals = buf;
2349       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2350       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2351       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2352     }
2353     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2354 
2355   } else {
2356     /* receive numeric values */
2357     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2358 
2359     /* receive message of values*/
2360     vals   = buf;
2361     mycols = ibuf;
2362     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2363     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2364     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2365 
2366     /* insert into matrix */
2367     jj      = rstart*bs;
2368     for (i=0; i<m; i++) {
2369       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2370       mycols += locrowlens[i];
2371       vals   += locrowlens[i];
2372       jj++;
2373     }
2374   }
2375 
2376   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2377   ierr = PetscFree(buf);CHKERRQ(ierr);
2378   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2379   ierr = PetscFree(rowners);CHKERRQ(ierr);
2380   ierr = PetscFree(dlens);CHKERRQ(ierr);
2381   ierr = PetscFree(mask);CHKERRQ(ierr);
2382   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2383   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2384   *newmat = A;
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2390 /*@
2391    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2392 
2393    Input Parameters:
2394 .  mat  - the matrix
2395 .  fact - factor
2396 
2397    Collective on Mat
2398 
2399    Level: advanced
2400 
2401   Notes:
2402    This can also be set by the command line option: -mat_use_hash_table fact
2403 
2404 .keywords: matrix, hashtable, factor, HT
2405 
2406 .seealso: MatSetOption()
2407 @*/
2408 PetscErrorCode MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2409 {
2410   PetscFunctionBegin;
2411   SETERRQ(1,"Function not yet written for SBAIJ format");
2412   /* PetscFunctionReturn(0); */
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2417 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2418 {
2419   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2420   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2421   PetscReal    atmp;
2422   PetscReal    *work,*svalues,*rvalues;
2423   PetscErrorCode ierr;
2424   int i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2425   int          rank,size,*rowners_bs,dest,count,source;
2426   PetscScalar  *va;
2427   MatScalar    *ba;
2428   MPI_Status   stat;
2429 
2430   PetscFunctionBegin;
2431   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2432   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2433 
2434   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2435   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2436 
2437   bs   = a->bs;
2438   mbs  = a->mbs;
2439   Mbs  = a->Mbs;
2440   ba   = b->a;
2441   bi   = b->i;
2442   bj   = b->j;
2443   /*
2444   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2445   PetscSynchronizedFlush(A->comm);
2446   */
2447 
2448   /* find ownerships */
2449   rowners_bs = a->rowners_bs;
2450   /*
2451   if (!rank){
2452     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2453   }
2454   */
2455 
2456   /* each proc creates an array to be distributed */
2457   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2458   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2459 
2460   /* row_max for B */
2461   if (rank != size-1){
2462     for (i=0; i<mbs; i++) {
2463       ncols = bi[1] - bi[0]; bi++;
2464       brow  = bs*i;
2465       for (j=0; j<ncols; j++){
2466         bcol = bs*(*bj);
2467         for (kcol=0; kcol<bs; kcol++){
2468           col = bcol + kcol;                 /* local col index */
2469           col += rowners_bs[rank+1];      /* global col index */
2470           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2471           for (krow=0; krow<bs; krow++){
2472             atmp = PetscAbsScalar(*ba); ba++;
2473             row = brow + krow;    /* local row index */
2474             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2475             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2476             if (work[col] < atmp) work[col] = atmp;
2477           }
2478         }
2479         bj++;
2480       }
2481     }
2482     /*
2483       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2484       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2485       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2486       */
2487 
2488     /* send values to its owners */
2489     for (dest=rank+1; dest<size; dest++){
2490       svalues = work + rowners_bs[dest];
2491       count   = rowners_bs[dest+1]-rowners_bs[dest];
2492       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2493       /*
2494       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2495       PetscSynchronizedFlush(A->comm);
2496       */
2497     }
2498   }
2499 
2500   /* receive values */
2501   if (rank){
2502     rvalues = work;
2503     count   = rowners_bs[rank+1]-rowners_bs[rank];
2504     for (source=0; source<rank; source++){
2505       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2506       /* process values */
2507       for (i=0; i<count; i++){
2508         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2509       }
2510       /*
2511       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2512       PetscSynchronizedFlush(A->comm);
2513       */
2514     }
2515   }
2516 
2517   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2518   ierr = PetscFree(work);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatRelax_MPISBAIJ"
2524 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2525 {
2526   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2527   PetscErrorCode ierr;
2528   int mbs=mat->mbs,bs=mat->bs;
2529   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2530   Vec            bb1;
2531 
2532   PetscFunctionBegin;
2533   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2534   if (bs > 1)
2535     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2536 
2537   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2538     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2539       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2540       its--;
2541     }
2542 
2543     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2544     while (its--){
2545 
2546       /* lower triangular part: slvec0b = - B^T*xx */
2547       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2548 
2549       /* copy xx into slvec0a */
2550       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2551       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2552       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2553       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2554 
2555       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2556 
2557       /* copy bb into slvec1a */
2558       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2559       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2560       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2561       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2562 
2563       /* set slvec1b = 0 */
2564       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2565 
2566       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2567       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2568       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2569       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2570 
2571       /* upper triangular part: bb1 = bb1 - B*x */
2572       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2573 
2574       /* local diagonal sweep */
2575       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2576     }
2577     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2578   } else {
2579     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2580   }
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2586 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2587 {
2588   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2589   PetscErrorCode ierr;
2590   PetscScalar    mone=-1.0;
2591   Vec            lvec1,bb1;
2592 
2593   PetscFunctionBegin;
2594   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2595   if (mat->bs > 1)
2596     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2597 
2598   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2599     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2600       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2601       its--;
2602     }
2603 
2604     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2605     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2606     while (its--){
2607       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2608 
2609       /* lower diagonal part: bb1 = bb - B^T*xx */
2610       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2611       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2612 
2613       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2614       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2615       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2616 
2617       /* upper diagonal part: bb1 = bb1 - B*x */
2618       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2619       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2620 
2621       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2622 
2623       /* diagonal sweep */
2624       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2625     }
2626     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2627     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2628   } else {
2629     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2630   }
2631   PetscFunctionReturn(0);
2632 }
2633 
2634