xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2fc52814d27bf1f4e71021c1c3ebb532b583ed60)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
83 static int CreateColmap_MPISBAIJ_Private(Mat mat)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"Function not yet written for SBAIJ format");
87   /* PetscFunctionReturn(0); */
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
123  \
124         /* malloc new storage space */ \
125         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
127         new_j = (int*)(new_a + bs2*new_nz); \
128         new_i = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
199  \
200         /* malloc new storage space */ \
201         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
203         new_j = (int*)(new_a + bs2*new_nz); \
204         new_i = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNCT__
248 #define __FUNCT__ "MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int          ierr,i,N = m*n;
253   MatScalar    *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,"Function not yet written for SBAIJ format");
302   /* PetscFunctionReturn(0); */
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,"Function not yet written for SBAIJ format");
315   /* PetscFunctionReturn(0); */
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar    value;
328   PetscTruth   roworiented = baij->roworiented;
329   int          ierr,i,j,row,col;
330   int          rstart_orig=baij->rstart_bs;
331   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
332   int          cend_orig=baij->cend_bs,bs=baij->bs;
333 
334   /* Some Variables required in the macro */
335   Mat          A = baij->A;
336   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
337   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
338   MatScalar    *aa=a->a;
339 
340   Mat          B = baij->B;
341   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
342   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
343   MatScalar    *ba=b->a;
344 
345   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
346   int          low,high,t,ridx,cidx,bs2=a->bs2;
347   MatScalar    *ap,*bap;
348 
349   /* for stash */
350   int          n_loc, *in_loc=0;
351   MatScalar    *v_loc=0;
352 
353   PetscFunctionBegin;
354 
355   if(!baij->donotstash){
356     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
357     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
358   }
359 
360   for (i=0; i<m; i++) {
361     if (im[i] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
364 #endif
365     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
366       row = im[i] - rstart_orig;              /* local row index */
367       for (j=0; j<n; j++) {
368         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
369         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
370           col = in[j] - cstart_orig;          /* local col index */
371           brow = row/bs; bcol = col/bs;
372           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         } else if (in[j] < 0) continue;
377 #if defined(PETSC_USE_BOPT_g)
378         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
379 #endif
380         else {  /* off-diag entry (B) */
381           if (mat->was_assembled) {
382             if (!baij->colmap) {
383               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
384             }
385 #if defined (PETSC_USE_CTABLE)
386             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
387             col  = col - 1;
388 #else
389             col = baij->colmap[in[j]/bs] - 1;
390 #endif
391             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
395               B = baij->B;
396               b = (Mat_SeqBAIJ*)(B)->data;
397               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
398               ba=b->a;
399             } else col += in[j]%bs;
400           } else col = in[j];
401           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
402           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
403           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
404         }
405       }
406     } else {  /* off processor entry */
407       if (!baij->donotstash) {
408         n_loc = 0;
409         for (j=0; j<n; j++){
410           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
411           in_loc[n_loc] = in[j];
412           if (roworiented) {
413             v_loc[n_loc] = v[i*n+j];
414           } else {
415             v_loc[n_loc] = v[j*m+i];
416           }
417           n_loc++;
418         }
419         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
420       }
421     }
422   }
423 
424   if(!baij->donotstash){
425     ierr = PetscFree(in_loc);CHKERRQ(ierr);
426     ierr = PetscFree(v_loc);CHKERRQ(ierr);
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
434 {
435   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
436   const MatScalar *value;
437   MatScalar       *barray=baij->barray;
438   PetscTruth      roworiented = baij->roworiented;
439   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
440   int             rend=baij->rend,cstart=baij->cstart,stepval;
441   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
442 
443   PetscFunctionBegin;
444   if(!barray) {
445     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
446     baij->barray = barray;
447   }
448 
449   if (roworiented) {
450     stepval = (n-1)*bs;
451   } else {
452     stepval = (m-1)*bs;
453   }
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456 #if defined(PETSC_USE_BOPT_g)
457     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
458 #endif
459     if (im[i] >= rstart && im[i] < rend) {
460       row = im[i] - rstart;
461       for (j=0; j<n; j++) {
462         /* If NumCol = 1 then a copy is not required */
463         if ((roworiented) && (n == 1)) {
464           barray = (MatScalar*) v + i*bs2;
465         } else if((!roworiented) && (m == 1)) {
466           barray = (MatScalar*) v + j*bs2;
467         } else { /* Here a copy is required */
468           if (roworiented) {
469             value = v + i*(stepval+bs)*bs + j*bs;
470           } else {
471             value = v + j*(stepval+bs)*bs + i*bs;
472           }
473           for (ii=0; ii<bs; ii++,value+=stepval) {
474             for (jj=0; jj<bs; jj++) {
475               *barray++  = *value++;
476             }
477           }
478           barray -=bs2;
479         }
480 
481         if (in[j] >= cstart && in[j] < cend){
482           col  = in[j] - cstart;
483           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
484         }
485         else if (in[j] < 0) continue;
486 #if defined(PETSC_USE_BOPT_g)
487         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
488 #endif
489         else {
490           if (mat->was_assembled) {
491             if (!baij->colmap) {
492               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
493             }
494 
495 #if defined(PETSC_USE_BOPT_g)
496 #if defined (PETSC_USE_CTABLE)
497             { int data;
498               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
499               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
500             }
501 #else
502             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
503 #endif
504 #endif
505 #if defined (PETSC_USE_CTABLE)
506 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
507             col  = (col - 1)/bs;
508 #else
509             col = (baij->colmap[in[j]] - 1)/bs;
510 #endif
511             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
512               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
513               col =  in[j];
514             }
515           }
516           else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (!baij->donotstash) {
522         if (roworiented) {
523           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
524         } else {
525           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
526         }
527       }
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 #define HASH_KEY 0.6180339887
534 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
535 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
536 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
539 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(1,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
548 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
549 {
550   PetscFunctionBegin;
551   SETERRQ(1,"Function not yet written for SBAIJ format");
552   /* PetscFunctionReturn(0); */
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPISBAIJ"
557 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
558 {
559   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
560   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
561   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
562 
563   PetscFunctionBegin;
564   for (i=0; i<m; i++) {
565     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
566     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
567     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
568       row = idxm[i] - bsrstart;
569       for (j=0; j<n; j++) {
570         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
571         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
572         if (idxn[j] >= bscstart && idxn[j] < bscend){
573           col = idxn[j] - bscstart;
574           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575         } else {
576           if (!baij->colmap) {
577             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
578           }
579 #if defined (PETSC_USE_CTABLE)
580           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
581           data --;
582 #else
583           data = baij->colmap[idxn[j]/bs]-1;
584 #endif
585           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
586           else {
587             col  = data + idxn[j]%bs;
588             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596  PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatNorm_MPISBAIJ"
601 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
602 {
603   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
604   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
605   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
606   int        ierr;
607   PetscReal  sum[2],*lnorm2;
608 
609   PetscFunctionBegin;
610   if (baij->size == 1) {
611     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
612   } else {
613     if (type == NORM_FROBENIUS) {
614       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
615       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
616       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
617       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
618       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
619       /*
620       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
621       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
622       */
623       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
624       /*
625       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
626       PetscSynchronizedFlush(mat->comm); */
627 
628       *norm = sqrt(sum[0] + 2*sum[1]);
629       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
630     } else {
631       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
632     }
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 /*
638   Creates the hash table, and sets the table
639   This table is created only once.
640   If new entried need to be added to the matrix
641   then the hash table has to be destroyed and
642   recreated.
643 */
644 #undef __FUNCT__
645 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
646 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
647 {
648   PetscFunctionBegin;
649   SETERRQ(1,"Function not yet written for SBAIJ format");
650   /* PetscFunctionReturn(0); */
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
655 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
658   int         ierr,nstash,reallocs;
659   InsertMode  addv;
660 
661   PetscFunctionBegin;
662   if (baij->donotstash) {
663     PetscFunctionReturn(0);
664   }
665 
666   /* make sure all processors are either in INSERTMODE or ADDMODE */
667   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
668   if (addv == (ADD_VALUES|INSERT_VALUES)) {
669     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
670   }
671   mat->insertmode = addv; /* in case this processor had no cache */
672 
673   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
674   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
675   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
676   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
677   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
678   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
684 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
685 {
686   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
687   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
688   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
689   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
690   int         *row,*col,other_disassembled;
691   PetscTruth  r1,r2,r3;
692   MatScalar   *val;
693   InsertMode  addv = mat->insertmode;
694 
695   PetscFunctionBegin;
696 
697   if (!baij->donotstash) {
698     while (1) {
699       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
700       /*
701       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
702       PetscSynchronizedFlush(mat->comm);
703       */
704       if (!flg) break;
705 
706       for (i=0; i<n;) {
707         /* Now identify the consecutive vals belonging to the same row */
708         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
713         i = j;
714       }
715     }
716     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
717     /* Now process the block-stash. Since the values are stashed column-oriented,
718        set the roworiented flag to column oriented, and after MatSetValues()
719        restore the original flags */
720     r1 = baij->roworiented;
721     r2 = a->roworiented;
722     r3 = b->roworiented;
723     baij->roworiented = PETSC_FALSE;
724     a->roworiented    = PETSC_FALSE;
725     b->roworiented    = PETSC_FALSE;
726     while (1) {
727       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
728       if (!flg) break;
729 
730       for (i=0; i<n;) {
731         /* Now identify the consecutive vals belonging to the same row */
732         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
733         if (j < n) ncols = j-i;
734         else       ncols = n-i;
735         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
736         i = j;
737       }
738     }
739     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
740     baij->roworiented = r1;
741     a->roworiented    = r2;
742     b->roworiented    = r3;
743   }
744 
745   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
746   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
747 
748   /* determine if any processor has disassembled, if so we must
749      also disassemble ourselfs, in order that we may reassemble. */
750   /*
751      if nonzero structure of submatrix B cannot change then we know that
752      no processor disassembled thus we can skip this stuff
753   */
754   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
755     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
756     if (mat->was_assembled && !other_disassembled) {
757       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
762     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
763   }
764   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
765   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
766 
767 #if defined(PETSC_USE_BOPT_g)
768   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
769     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
770     baij->ht_total_ct  = 0;
771     baij->ht_insert_ct = 0;
772   }
773 #endif
774   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
775     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
776     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
777     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
778   }
779 
780   if (baij->rowvalues) {
781     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
782     baij->rowvalues = 0;
783   }
784 
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
790 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
791 {
792   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
793   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
794   PetscTruth        isascii,isdraw;
795   PetscViewer       sviewer;
796   PetscViewerFormat format;
797 
798   PetscFunctionBegin;
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
801   if (isascii) {
802     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
803     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
804       MatInfo info;
805       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
806       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
807       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
808               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
809               baij->bs,(int)info.memory);CHKERRQ(ierr);
810       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
812       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
814       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
815       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
816       PetscFunctionReturn(0);
817     } else if (format == PETSC_VIEWER_ASCII_INFO) {
818       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
819       PetscFunctionReturn(0);
820     }
821   }
822 
823   if (isdraw) {
824     PetscDraw       draw;
825     PetscTruth isnull;
826     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
827     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
828   }
829 
830   if (size == 1) {
831     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
832     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
833   } else {
834     /* assemble the entire matrix onto first processor. */
835     Mat         A;
836     Mat_SeqSBAIJ *Aloc;
837     Mat_SeqBAIJ *Bloc;
838     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
839     MatScalar   *a;
840 
841     if (!rank) {
842       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
843     } else {
844       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
845     }
846     PetscLogObjectParent(mat,A);
847 
848     /* copy over the A part */
849     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
850     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
851     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
852 
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = (baij->cstart+aj[j])*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     /* copy over the B part */
865     Bloc = (Mat_SeqBAIJ*)baij->B->data;
866     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
867     for (i=0; i<mbs; i++) {
868       rvals[0] = bs*(baij->rstart + i);
869       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
870       for (j=ai[i]; j<ai[i+1]; j++) {
871         col = baij->garray[aj[j]]*bs;
872         for (k=0; k<bs; k++) {
873           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
874           col++; a += bs;
875         }
876       }
877     }
878     ierr = PetscFree(rvals);CHKERRQ(ierr);
879     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
880     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
881     /*
882        Everyone has to call to draw the matrix since the graphics waits are
883        synchronized across all processors that share the PetscDraw object
884     */
885     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
886     if (!rank) {
887       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
888       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
889     }
890     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
891     ierr = MatDestroy(A);CHKERRQ(ierr);
892   }
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatView_MPISBAIJ"
898 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
899 {
900   int        ierr;
901   PetscTruth isascii,isdraw,issocket,isbinary;
902 
903   PetscFunctionBegin;
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
908   if (isascii || isdraw || issocket || isbinary) {
909     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
910   } else {
911     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatDestroy_MPISBAIJ"
918 int MatDestroy_MPISBAIJ(Mat mat)
919 {
920   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
921   int         ierr;
922 
923   PetscFunctionBegin;
924 #if defined(PETSC_USE_LOG)
925   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
926 #endif
927   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
928   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
929   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
930   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
931   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
932 #if defined (PETSC_USE_CTABLE)
933   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
934 #else
935   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
936 #endif
937   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
938   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
939   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
940   if (baij->slvec0) {
941     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
942     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
943   }
944   if (baij->slvec1) {
945     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
946     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
947     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
948   }
949   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
950   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
951   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
952   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
953 #if defined(PETSC_USE_MAT_SINGLE)
954   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
955 #endif
956   ierr = PetscFree(baij);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatMult_MPISBAIJ"
962 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
963 {
964   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
965   int         ierr,nt,mbs=a->mbs,bs=a->bs;
966   PetscScalar *x,*from,zero=0.0;
967 
968   PetscFunctionBegin;
969   /*
970   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
971   PetscSynchronizedFlush(A->comm);
972   */
973   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
974   if (nt != A->n) {
975     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
976   }
977   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
978   if (nt != A->m) {
979     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
980   }
981 
982   /* diagonal part */
983   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
984   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
985 
986   /* subdiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
988 
989   /* copy x into the vec slvec0 */
990   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
991   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
993   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
994 
995   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
996   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
998 
999   /* supperdiagonal part */
1000   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1001 
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1007 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1008 {
1009   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1010   int         ierr,nt;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1014   if (nt != A->n) {
1015     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1016   }
1017   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1018   if (nt != A->m) {
1019     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1020   }
1021 
1022   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1023   /* do diagonal part */
1024   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1025   /* do supperdiagonal part */
1026   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1027   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1028   /* do subdiagonal part */
1029   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1030   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1031   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1032 
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1038 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1039 {
1040   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1041   int          ierr,mbs=a->mbs,bs=a->bs;
1042   PetscScalar  *x,*from,zero=0.0;
1043 
1044   PetscFunctionBegin;
1045   /*
1046   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1047   PetscSynchronizedFlush(A->comm);
1048   */
1049   /* diagonal part */
1050   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1051   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1052 
1053   /* subdiagonal part */
1054   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1055 
1056   /* copy x into the vec slvec0 */
1057   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1058   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1059   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1060   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1061 
1062   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1063   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1064   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1065 
1066   /* supperdiagonal part */
1067   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1068 
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1074 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1075 {
1076   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1077   int        ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1081   /* do diagonal part */
1082   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* do supperdiagonal part */
1084   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1085   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1086 
1087   /* do subdiagonal part */
1088   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1089   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1097 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1098 {
1099   PetscFunctionBegin;
1100   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1101   /* PetscFunctionReturn(0); */
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1106 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1107 {
1108   PetscFunctionBegin;
1109   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1110   /* PetscFunctionReturn(0); */
1111 }
1112 
1113 /*
1114   This only works correctly for square matrices where the subblock A->A is the
1115    diagonal block
1116 */
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1119 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1120 {
1121   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1122   int         ierr;
1123 
1124   PetscFunctionBegin;
1125   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPISBAIJ"
1132 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1133 {
1134   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1135   int         ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1139   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1145 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1146 {
1147   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1148   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1149   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1150   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1151   int            *cmap,*idx_p,cstart = mat->cstart;
1152 
1153   PetscFunctionBegin;
1154   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1155   mat->getrowactive = PETSC_TRUE;
1156 
1157   if (!mat->rowvalues && (idx || v)) {
1158     /*
1159         allocate enough space to hold information from the longest row.
1160     */
1161     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1162     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1163     int     max = 1,mbs = mat->mbs,tmp;
1164     for (i=0; i<mbs; i++) {
1165       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1166       if (max < tmp) { max = tmp; }
1167     }
1168     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1169     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1170   }
1171 
1172   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1173   lrow = row - brstart;  /* local row index */
1174 
1175   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1176   if (!v)   {pvA = 0; pvB = 0;}
1177   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1178   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1179   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1180   nztot = nzA + nzB;
1181 
1182   cmap  = mat->garray;
1183   if (v  || idx) {
1184     if (nztot) {
1185       /* Sort by increasing column numbers, assuming A and B already sorted */
1186       int imark = -1;
1187       if (v) {
1188         *v = v_p = mat->rowvalues;
1189         for (i=0; i<nzB; i++) {
1190           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1191           else break;
1192         }
1193         imark = i;
1194         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1195         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1196       }
1197       if (idx) {
1198         *idx = idx_p = mat->rowindices;
1199         if (imark > -1) {
1200           for (i=0; i<imark; i++) {
1201             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1202           }
1203         } else {
1204           for (i=0; i<nzB; i++) {
1205             if (cmap[cworkB[i]/bs] < cstart)
1206               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1207             else break;
1208           }
1209           imark = i;
1210         }
1211         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1212         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1213       }
1214     } else {
1215       if (idx) *idx = 0;
1216       if (v)   *v   = 0;
1217     }
1218   }
1219   *nz = nztot;
1220   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1221   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1227 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1228 {
1229   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1230 
1231   PetscFunctionBegin;
1232   if (baij->getrowactive == PETSC_FALSE) {
1233     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1234   }
1235   baij->getrowactive = PETSC_FALSE;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1241 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1242 {
1243   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1244 
1245   PetscFunctionBegin;
1246   *bs = baij->bs;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1252 int MatZeroEntries_MPISBAIJ(Mat A)
1253 {
1254   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1255   int         ierr;
1256 
1257   PetscFunctionBegin;
1258   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1259   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1265 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1266 {
1267   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1268   Mat         A = a->A,B = a->B;
1269   int         ierr;
1270   PetscReal   isend[5],irecv[5];
1271 
1272   PetscFunctionBegin;
1273   info->block_size     = (PetscReal)a->bs;
1274   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1275   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1276   isend[3] = info->memory;  isend[4] = info->mallocs;
1277   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1278   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1279   isend[3] += info->memory;  isend[4] += info->mallocs;
1280   if (flag == MAT_LOCAL) {
1281     info->nz_used      = isend[0];
1282     info->nz_allocated = isend[1];
1283     info->nz_unneeded  = isend[2];
1284     info->memory       = isend[3];
1285     info->mallocs      = isend[4];
1286   } else if (flag == MAT_GLOBAL_MAX) {
1287     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1288     info->nz_used      = irecv[0];
1289     info->nz_allocated = irecv[1];
1290     info->nz_unneeded  = irecv[2];
1291     info->memory       = irecv[3];
1292     info->mallocs      = irecv[4];
1293   } else if (flag == MAT_GLOBAL_SUM) {
1294     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1295     info->nz_used      = irecv[0];
1296     info->nz_allocated = irecv[1];
1297     info->nz_unneeded  = irecv[2];
1298     info->memory       = irecv[3];
1299     info->mallocs      = irecv[4];
1300   } else {
1301     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1302   }
1303   info->rows_global       = (PetscReal)A->M;
1304   info->columns_global    = (PetscReal)A->N;
1305   info->rows_local        = (PetscReal)A->m;
1306   info->columns_local     = (PetscReal)A->N;
1307   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1308   info->fill_ratio_needed = 0;
1309   info->factor_mallocs    = 0;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1315 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1316 {
1317   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1318   int         ierr;
1319 
1320   PetscFunctionBegin;
1321   switch (op) {
1322   case MAT_NO_NEW_NONZERO_LOCATIONS:
1323   case MAT_YES_NEW_NONZERO_LOCATIONS:
1324   case MAT_COLUMNS_UNSORTED:
1325   case MAT_COLUMNS_SORTED:
1326   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1327   case MAT_KEEP_ZEROED_ROWS:
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1330     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1331     break;
1332   case MAT_ROW_ORIENTED:
1333     a->roworiented = PETSC_TRUE;
1334     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1335     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1336     break;
1337   case MAT_ROWS_SORTED:
1338   case MAT_ROWS_UNSORTED:
1339   case MAT_YES_NEW_DIAGONALS:
1340     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1341     break;
1342   case MAT_COLUMN_ORIENTED:
1343     a->roworiented = PETSC_FALSE;
1344     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1345     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1346     break;
1347   case MAT_IGNORE_OFF_PROC_ENTRIES:
1348     a->donotstash = PETSC_TRUE;
1349     break;
1350   case MAT_NO_NEW_DIAGONALS:
1351     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1352   case MAT_USE_HASH_TABLE:
1353     a->ht_flag = PETSC_TRUE;
1354     break;
1355   case MAT_NOT_SYMMETRIC:
1356   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1357   case MAT_HERMITIAN:
1358     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1359   case MAT_SYMMETRIC:
1360   case MAT_STRUCTURALLY_SYMMETRIC:
1361   case MAT_NOT_HERMITIAN:
1362   case MAT_SYMMETRY_ETERNAL:
1363   case MAT_NOT_SYMMETRY_ETERNAL:
1364     break;
1365   default:
1366     SETERRQ(PETSC_ERR_SUP,"unknown option");
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1373 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1374 {
1375   int ierr;
1376   PetscFunctionBegin;
1377   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1383 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1384 {
1385   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1386   Mat         a = baij->A,b = baij->B;
1387   int         ierr,s1,s2,s3;
1388 
1389   PetscFunctionBegin;
1390   if (ll != rr) {
1391     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1392   }
1393   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1394   if (rr) {
1395     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1396     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1397     /* Overlap communication with computation. */
1398     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1399     /*} if (ll) { */
1400     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1401     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1402     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1403     /* } */
1404   /* scale  the diagonal block */
1405   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1406 
1407   /* if (rr) { */
1408     /* Do a scatter end and then right scale the off-diagonal block */
1409     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1410     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1411   }
1412 
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1418 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1419 {
1420   PetscFunctionBegin;
1421   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1426 int MatPrintHelp_MPISBAIJ(Mat A)
1427 {
1428   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1429   MPI_Comm    comm = A->comm;
1430   static int  called = 0;
1431   int         ierr;
1432 
1433   PetscFunctionBegin;
1434   if (!a->rank) {
1435     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1436   }
1437   if (called) {PetscFunctionReturn(0);} else called = 1;
1438   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1439   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1445 int MatSetUnfactored_MPISBAIJ(Mat A)
1446 {
1447   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1448   int         ierr;
1449 
1450   PetscFunctionBegin;
1451   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatEqual_MPISBAIJ"
1459 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1460 {
1461   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1462   Mat         a,b,c,d;
1463   PetscTruth  flg;
1464   int         ierr;
1465 
1466   PetscFunctionBegin;
1467   a = matA->A; b = matA->B;
1468   c = matB->A; d = matB->B;
1469 
1470   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1471   if (flg == PETSC_TRUE) {
1472     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1473   }
1474   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1480 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1481 {
1482   int        ierr;
1483 
1484   PetscFunctionBegin;
1485   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 /* -------------------------------------------------------------------*/
1489 static struct _MatOps MatOps_Values = {
1490        MatSetValues_MPISBAIJ,
1491        MatGetRow_MPISBAIJ,
1492        MatRestoreRow_MPISBAIJ,
1493        MatMult_MPISBAIJ,
1494 /* 4*/ MatMultAdd_MPISBAIJ,
1495        MatMultTranspose_MPISBAIJ,
1496        MatMultTransposeAdd_MPISBAIJ,
1497        0,
1498        0,
1499        0,
1500 /*10*/ 0,
1501        0,
1502        0,
1503        MatRelax_MPISBAIJ,
1504        MatTranspose_MPISBAIJ,
1505 /*15*/ MatGetInfo_MPISBAIJ,
1506        MatEqual_MPISBAIJ,
1507        MatGetDiagonal_MPISBAIJ,
1508        MatDiagonalScale_MPISBAIJ,
1509        MatNorm_MPISBAIJ,
1510 /*20*/ MatAssemblyBegin_MPISBAIJ,
1511        MatAssemblyEnd_MPISBAIJ,
1512        0,
1513        MatSetOption_MPISBAIJ,
1514        MatZeroEntries_MPISBAIJ,
1515 /*25*/ MatZeroRows_MPISBAIJ,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*35*/ MatDuplicate_MPISBAIJ,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*40*/ 0,
1531        0,
1532        0,
1533        MatGetValues_MPISBAIJ,
1534        0,
1535 /*45*/ MatPrintHelp_MPISBAIJ,
1536        MatScale_MPISBAIJ,
1537        0,
1538        0,
1539        0,
1540 /*50*/ MatGetBlockSize_MPISBAIJ,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*55*/ 0,
1546        0,
1547        MatSetUnfactored_MPISBAIJ,
1548        0,
1549        MatSetValuesBlocked_MPISBAIJ,
1550 /*60*/ 0,
1551        0,
1552        0,
1553        MatGetPetscMaps_Petsc,
1554        0,
1555 /*65*/ 0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*70*/ MatGetRowMax_MPISBAIJ,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*75*/ 0,
1566        0,
1567        0,
1568        0,
1569        0,
1570 /*80*/ 0,
1571        0,
1572        0,
1573        0,
1574        0,
1575 /*85*/ MatLoad_MPISBAIJ
1576 };
1577 
1578 
1579 EXTERN_C_BEGIN
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1582 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1583 {
1584   PetscFunctionBegin;
1585   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1586   *iscopy = PETSC_FALSE;
1587   PetscFunctionReturn(0);
1588 }
1589 EXTERN_C_END
1590 
1591 EXTERN_C_BEGIN
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1594 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1595 {
1596   Mat_MPISBAIJ *b;
1597   int          ierr,i,mbs,Mbs;
1598 
1599   PetscFunctionBegin;
1600   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1601 
1602   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1603   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1604   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1605   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1606   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1607   if (d_nnz) {
1608     for (i=0; i<B->m/bs; i++) {
1609       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1610     }
1611   }
1612   if (o_nnz) {
1613     for (i=0; i<B->m/bs; i++) {
1614       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1615     }
1616   }
1617   B->preallocated = PETSC_TRUE;
1618   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1619   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1620   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1621   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1622 
1623   b   = (Mat_MPISBAIJ*)B->data;
1624   mbs = B->m/bs;
1625   Mbs = B->M/bs;
1626   if (mbs*bs != B->m) {
1627     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1628   }
1629 
1630   b->bs  = bs;
1631   b->bs2 = bs*bs;
1632   b->mbs = mbs;
1633   b->nbs = mbs;
1634   b->Mbs = Mbs;
1635   b->Nbs = Mbs;
1636 
1637   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1638   b->rowners[0]    = 0;
1639   for (i=2; i<=b->size; i++) {
1640     b->rowners[i] += b->rowners[i-1];
1641   }
1642   b->rstart    = b->rowners[b->rank];
1643   b->rend      = b->rowners[b->rank+1];
1644   b->cstart    = b->rstart;
1645   b->cend      = b->rend;
1646   for (i=0; i<=b->size; i++) {
1647     b->rowners_bs[i] = b->rowners[i]*bs;
1648   }
1649   b->rstart_bs = b-> rstart*bs;
1650   b->rend_bs   = b->rend*bs;
1651 
1652   b->cstart_bs = b->cstart*bs;
1653   b->cend_bs   = b->cend*bs;
1654 
1655 
1656   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1657   PetscLogObjectParent(B,b->A);
1658   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1659   PetscLogObjectParent(B,b->B);
1660 
1661   /* build cache for off array entries formed */
1662   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1663 
1664   PetscFunctionReturn(0);
1665 }
1666 EXTERN_C_END
1667 
1668 /*MC
1669    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1670    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1671 
1672    Options Database Keys:
1673 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1674 
1675   Level: beginner
1676 
1677 .seealso: MatCreateMPISBAIJ
1678 M*/
1679 
1680 EXTERN_C_BEGIN
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatCreate_MPISBAIJ"
1683 int MatCreate_MPISBAIJ(Mat B)
1684 {
1685   Mat_MPISBAIJ *b;
1686   int          ierr;
1687   PetscTruth   flg;
1688 
1689   PetscFunctionBegin;
1690 
1691   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1692   B->data = (void*)b;
1693   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1694   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1695 
1696   B->ops->destroy    = MatDestroy_MPISBAIJ;
1697   B->ops->view       = MatView_MPISBAIJ;
1698   B->mapping    = 0;
1699   B->factor     = 0;
1700   B->assembled  = PETSC_FALSE;
1701 
1702   B->insertmode = NOT_SET_VALUES;
1703   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1704   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1705 
1706   /* build local table of row and column ownerships */
1707   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1708   b->cowners    = b->rowners + b->size + 2;
1709   b->rowners_bs = b->cowners + b->size + 2;
1710   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1711 
1712   /* build cache for off array entries formed */
1713   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1714   b->donotstash  = PETSC_FALSE;
1715   b->colmap      = PETSC_NULL;
1716   b->garray      = PETSC_NULL;
1717   b->roworiented = PETSC_TRUE;
1718 
1719 #if defined(PETSC_USE_MAT_SINGLE)
1720   /* stuff for MatSetValues_XXX in single precision */
1721   b->setvalueslen     = 0;
1722   b->setvaluescopy    = PETSC_NULL;
1723 #endif
1724 
1725   /* stuff used in block assembly */
1726   b->barray       = 0;
1727 
1728   /* stuff used for matrix vector multiply */
1729   b->lvec         = 0;
1730   b->Mvctx        = 0;
1731   b->slvec0       = 0;
1732   b->slvec0b      = 0;
1733   b->slvec1       = 0;
1734   b->slvec1a      = 0;
1735   b->slvec1b      = 0;
1736   b->sMvctx       = 0;
1737 
1738   /* stuff for MatGetRow() */
1739   b->rowindices   = 0;
1740   b->rowvalues    = 0;
1741   b->getrowactive = PETSC_FALSE;
1742 
1743   /* hash table stuff */
1744   b->ht           = 0;
1745   b->hd           = 0;
1746   b->ht_size      = 0;
1747   b->ht_flag      = PETSC_FALSE;
1748   b->ht_fact      = 0;
1749   b->ht_total_ct  = 0;
1750   b->ht_insert_ct = 0;
1751 
1752   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1753   if (flg) {
1754     PetscReal fact = 1.39;
1755     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1756     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1757     if (fact <= 1.0) fact = 1.39;
1758     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1759     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1760   }
1761   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1762                                      "MatStoreValues_MPISBAIJ",
1763                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1764   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1765                                      "MatRetrieveValues_MPISBAIJ",
1766                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1767   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1768                                      "MatGetDiagonalBlock_MPISBAIJ",
1769                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1770   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1771                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1772                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 EXTERN_C_END
1776 
1777 /*MC
1778    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1779 
1780    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1781    and MATMPISBAIJ otherwise.
1782 
1783    Options Database Keys:
1784 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1785 
1786   Level: beginner
1787 
1788 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1789 M*/
1790 
1791 EXTERN_C_BEGIN
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatCreate_SBAIJ"
1794 int MatCreate_SBAIJ(Mat A) {
1795   int ierr,size;
1796 
1797   PetscFunctionBegin;
1798   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1799   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1800   if (size == 1) {
1801     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1802   } else {
1803     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 EXTERN_C_END
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1811 /*@C
1812    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1813    the user should preallocate the matrix storage by setting the parameters
1814    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1815    performance can be increased by more than a factor of 50.
1816 
1817    Collective on Mat
1818 
1819    Input Parameters:
1820 +  A - the matrix
1821 .  bs   - size of blockk
1822 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1823            submatrix  (same for all local rows)
1824 .  d_nnz - array containing the number of block nonzeros in the various block rows
1825            in the upper triangular and diagonal part of the in diagonal portion of the local
1826            (possibly different for each block row) or PETSC_NULL.  You must leave room
1827            for the diagonal entry even if it is zero.
1828 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1829            submatrix (same for all local rows).
1830 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1831            off-diagonal portion of the local submatrix (possibly different for
1832            each block row) or PETSC_NULL.
1833 
1834 
1835    Options Database Keys:
1836 .   -mat_no_unroll - uses code that does not unroll the loops in the
1837                      block calculations (much slower)
1838 .   -mat_block_size - size of the blocks to use
1839 
1840    Notes:
1841 
1842    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1843    than it must be used on all processors that share the object for that argument.
1844 
1845    Storage Information:
1846    For a square global matrix we define each processor's diagonal portion
1847    to be its local rows and the corresponding columns (a square submatrix);
1848    each processor's off-diagonal portion encompasses the remainder of the
1849    local matrix (a rectangular submatrix).
1850 
1851    The user can specify preallocated storage for the diagonal part of
1852    the local submatrix with either d_nz or d_nnz (not both).  Set
1853    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1854    memory allocation.  Likewise, specify preallocated storage for the
1855    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1856 
1857    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1858    the figure below we depict these three local rows and all columns (0-11).
1859 
1860 .vb
1861            0 1 2 3 4 5 6 7 8 9 10 11
1862           -------------------
1863    row 3  |  o o o d d d o o o o o o
1864    row 4  |  o o o d d d o o o o o o
1865    row 5  |  o o o d d d o o o o o o
1866           -------------------
1867 .ve
1868 
1869    Thus, any entries in the d locations are stored in the d (diagonal)
1870    submatrix, and any entries in the o locations are stored in the
1871    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1872    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1873 
1874    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1875    plus the diagonal part of the d matrix,
1876    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1877    In general, for PDE problems in which most nonzeros are near the diagonal,
1878    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1879    or you will get TERRIBLE performance; see the users' manual chapter on
1880    matrices.
1881 
1882    Level: intermediate
1883 
1884 .keywords: matrix, block, aij, compressed row, sparse, parallel
1885 
1886 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1887 @*/
1888 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1889 {
1890   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1891 
1892   PetscFunctionBegin;
1893   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1894   if (f) {
1895     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1896   }
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatCreateMPISBAIJ"
1902 /*@C
1903    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1904    (block compressed row).  For good matrix assembly performance
1905    the user should preallocate the matrix storage by setting the parameters
1906    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1907    performance can be increased by more than a factor of 50.
1908 
1909    Collective on MPI_Comm
1910 
1911    Input Parameters:
1912 +  comm - MPI communicator
1913 .  bs   - size of blockk
1914 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1915            This value should be the same as the local size used in creating the
1916            y vector for the matrix-vector product y = Ax.
1917 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1918            This value should be the same as the local size used in creating the
1919            x vector for the matrix-vector product y = Ax.
1920 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1921 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1922 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1923            submatrix  (same for all local rows)
1924 .  d_nnz - array containing the number of block nonzeros in the various block rows
1925            in the upper triangular portion of the in diagonal portion of the local
1926            (possibly different for each block block row) or PETSC_NULL.
1927            You must leave room for the diagonal entry even if it is zero.
1928 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1929            submatrix (same for all local rows).
1930 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1931            off-diagonal portion of the local submatrix (possibly different for
1932            each block row) or PETSC_NULL.
1933 
1934    Output Parameter:
1935 .  A - the matrix
1936 
1937    Options Database Keys:
1938 .   -mat_no_unroll - uses code that does not unroll the loops in the
1939                      block calculations (much slower)
1940 .   -mat_block_size - size of the blocks to use
1941 .   -mat_mpi - use the parallel matrix data structures even on one processor
1942                (defaults to using SeqBAIJ format on one processor)
1943 
1944    Notes:
1945    The user MUST specify either the local or global matrix dimensions
1946    (possibly both).
1947 
1948    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1949    than it must be used on all processors that share the object for that argument.
1950 
1951    Storage Information:
1952    For a square global matrix we define each processor's diagonal portion
1953    to be its local rows and the corresponding columns (a square submatrix);
1954    each processor's off-diagonal portion encompasses the remainder of the
1955    local matrix (a rectangular submatrix).
1956 
1957    The user can specify preallocated storage for the diagonal part of
1958    the local submatrix with either d_nz or d_nnz (not both).  Set
1959    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1960    memory allocation.  Likewise, specify preallocated storage for the
1961    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1962 
1963    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1964    the figure below we depict these three local rows and all columns (0-11).
1965 
1966 .vb
1967            0 1 2 3 4 5 6 7 8 9 10 11
1968           -------------------
1969    row 3  |  o o o d d d o o o o o o
1970    row 4  |  o o o d d d o o o o o o
1971    row 5  |  o o o d d d o o o o o o
1972           -------------------
1973 .ve
1974 
1975    Thus, any entries in the d locations are stored in the d (diagonal)
1976    submatrix, and any entries in the o locations are stored in the
1977    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1978    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1979 
1980    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1981    plus the diagonal part of the d matrix,
1982    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1983    In general, for PDE problems in which most nonzeros are near the diagonal,
1984    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1985    or you will get TERRIBLE performance; see the users' manual chapter on
1986    matrices.
1987 
1988    Level: intermediate
1989 
1990 .keywords: matrix, block, aij, compressed row, sparse, parallel
1991 
1992 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1993 @*/
1994 
1995 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
1996 {
1997   int ierr,size;
1998 
1999   PetscFunctionBegin;
2000   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2001   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2002   if (size > 1) {
2003     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2004     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2005   } else {
2006     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2007     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2015 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2016 {
2017   Mat          mat;
2018   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2019   int          ierr,len=0;
2020 
2021   PetscFunctionBegin;
2022   *newmat       = 0;
2023   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2024   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2025   mat->preallocated = PETSC_TRUE;
2026   a = (Mat_MPISBAIJ*)mat->data;
2027   a->bs  = oldmat->bs;
2028   a->bs2 = oldmat->bs2;
2029   a->mbs = oldmat->mbs;
2030   a->nbs = oldmat->nbs;
2031   a->Mbs = oldmat->Mbs;
2032   a->Nbs = oldmat->Nbs;
2033 
2034   a->rstart       = oldmat->rstart;
2035   a->rend         = oldmat->rend;
2036   a->cstart       = oldmat->cstart;
2037   a->cend         = oldmat->cend;
2038   a->size         = oldmat->size;
2039   a->rank         = oldmat->rank;
2040   a->donotstash   = oldmat->donotstash;
2041   a->roworiented  = oldmat->roworiented;
2042   a->rowindices   = 0;
2043   a->rowvalues    = 0;
2044   a->getrowactive = PETSC_FALSE;
2045   a->barray       = 0;
2046   a->rstart_bs    = oldmat->rstart_bs;
2047   a->rend_bs      = oldmat->rend_bs;
2048   a->cstart_bs    = oldmat->cstart_bs;
2049   a->cend_bs      = oldmat->cend_bs;
2050 
2051   /* hash table stuff */
2052   a->ht           = 0;
2053   a->hd           = 0;
2054   a->ht_size      = 0;
2055   a->ht_flag      = oldmat->ht_flag;
2056   a->ht_fact      = oldmat->ht_fact;
2057   a->ht_total_ct  = 0;
2058   a->ht_insert_ct = 0;
2059 
2060   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2061   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2062   a->cowners    = a->rowners + a->size + 2;
2063   a->rowners_bs = a->cowners + a->size + 2;
2064   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2065   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2066   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2067   if (oldmat->colmap) {
2068 #if defined (PETSC_USE_CTABLE)
2069     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2070 #else
2071     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2072     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2073     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2074 #endif
2075   } else a->colmap = 0;
2076   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2077     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2078     PetscLogObjectMemory(mat,len*sizeof(int));
2079     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2080   } else a->garray = 0;
2081 
2082   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2083   PetscLogObjectParent(mat,a->lvec);
2084   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2085 
2086   PetscLogObjectParent(mat,a->Mvctx);
2087   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2088   PetscLogObjectParent(mat,a->A);
2089   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2090   PetscLogObjectParent(mat,a->B);
2091   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2092   *newmat = mat;
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #include "petscsys.h"
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatLoad_MPISBAIJ"
2100 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2101 {
2102   Mat          A;
2103   int          i,nz,ierr,j,rstart,rend,fd;
2104   PetscScalar  *vals,*buf;
2105   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2106   MPI_Status   status;
2107   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2108   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2109   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2110   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2111   int          dcount,kmax,k,nzcount,tmp;
2112 
2113   PetscFunctionBegin;
2114   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2115 
2116   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2117   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2118   if (!rank) {
2119     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2120     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2121     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2122     if (header[3] < 0) {
2123       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2124     }
2125   }
2126 
2127   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2128   M = header[1]; N = header[2];
2129 
2130   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2131 
2132   /*
2133      This code adds extra rows to make sure the number of rows is
2134      divisible by the blocksize
2135   */
2136   Mbs        = M/bs;
2137   extra_rows = bs - M + bs*(Mbs);
2138   if (extra_rows == bs) extra_rows = 0;
2139   else                  Mbs++;
2140   if (extra_rows &&!rank) {
2141     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2142   }
2143 
2144   /* determine ownership of all rows */
2145   mbs        = Mbs/size + ((Mbs % size) > rank);
2146   m          = mbs*bs;
2147   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2148   browners   = rowners + size + 1;
2149   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2150   rowners[0] = 0;
2151   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2152   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2153   rstart = rowners[rank];
2154   rend   = rowners[rank+1];
2155 
2156   /* distribute row lengths to all processors */
2157   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2158   if (!rank) {
2159     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2160     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2161     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2162     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2163     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2164     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2165     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2166   } else {
2167     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2168   }
2169 
2170   if (!rank) {   /* procs[0] */
2171     /* calculate the number of nonzeros on each processor */
2172     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2173     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2174     for (i=0; i<size; i++) {
2175       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2176         procsnz[i] += rowlengths[j];
2177       }
2178     }
2179     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2180 
2181     /* determine max buffer needed and allocate it */
2182     maxnz = 0;
2183     for (i=0; i<size; i++) {
2184       maxnz = PetscMax(maxnz,procsnz[i]);
2185     }
2186     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2187 
2188     /* read in my part of the matrix column indices  */
2189     nz     = procsnz[0];
2190     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2191     mycols = ibuf;
2192     if (size == 1)  nz -= extra_rows;
2193     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2194     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2195 
2196     /* read in every ones (except the last) and ship off */
2197     for (i=1; i<size-1; i++) {
2198       nz   = procsnz[i];
2199       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2200       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2201     }
2202     /* read in the stuff for the last proc */
2203     if (size != 1) {
2204       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2205       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2206       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2207       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2208     }
2209     ierr = PetscFree(cols);CHKERRQ(ierr);
2210   } else {  /* procs[i], i>0 */
2211     /* determine buffer space needed for message */
2212     nz = 0;
2213     for (i=0; i<m; i++) {
2214       nz += locrowlens[i];
2215     }
2216     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2217     mycols = ibuf;
2218     /* receive message of column indices*/
2219     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2220     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2221     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2222   }
2223 
2224   /* loop over local rows, determining number of off diagonal entries */
2225   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2226   odlens   = dlens + (rend-rstart);
2227   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2228   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2229   masked1  = mask    + Mbs;
2230   masked2  = masked1 + Mbs;
2231   rowcount = 0; nzcount = 0;
2232   for (i=0; i<mbs; i++) {
2233     dcount  = 0;
2234     odcount = 0;
2235     for (j=0; j<bs; j++) {
2236       kmax = locrowlens[rowcount];
2237       for (k=0; k<kmax; k++) {
2238         tmp = mycols[nzcount++]/bs; /* block col. index */
2239         if (!mask[tmp]) {
2240           mask[tmp] = 1;
2241           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2242           else masked1[dcount++] = tmp; /* entry in diag portion */
2243         }
2244       }
2245       rowcount++;
2246     }
2247 
2248     dlens[i]  = dcount;  /* d_nzz[i] */
2249     odlens[i] = odcount; /* o_nzz[i] */
2250 
2251     /* zero out the mask elements we set */
2252     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2253     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2254   }
2255 
2256   /* create our matrix */
2257   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2258   ierr = MatSetType(A,type);CHKERRQ(ierr);
2259   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2260   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2261 
2262   if (!rank) {
2263     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2264     /* read in my part of the matrix numerical values  */
2265     nz = procsnz[0];
2266     vals = buf;
2267     mycols = ibuf;
2268     if (size == 1)  nz -= extra_rows;
2269     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2270     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2271 
2272     /* insert into matrix */
2273     jj      = rstart*bs;
2274     for (i=0; i<m; i++) {
2275       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2276       mycols += locrowlens[i];
2277       vals   += locrowlens[i];
2278       jj++;
2279     }
2280 
2281     /* read in other processors (except the last one) and ship out */
2282     for (i=1; i<size-1; i++) {
2283       nz   = procsnz[i];
2284       vals = buf;
2285       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2286       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2287     }
2288     /* the last proc */
2289     if (size != 1){
2290       nz   = procsnz[i] - extra_rows;
2291       vals = buf;
2292       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2293       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2294       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2295     }
2296     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2297 
2298   } else {
2299     /* receive numeric values */
2300     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2301 
2302     /* receive message of values*/
2303     vals   = buf;
2304     mycols = ibuf;
2305     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2306     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2307     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2308 
2309     /* insert into matrix */
2310     jj      = rstart*bs;
2311     for (i=0; i<m; i++) {
2312       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2313       mycols += locrowlens[i];
2314       vals   += locrowlens[i];
2315       jj++;
2316     }
2317   }
2318 
2319   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2320   ierr = PetscFree(buf);CHKERRQ(ierr);
2321   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2322   ierr = PetscFree(rowners);CHKERRQ(ierr);
2323   ierr = PetscFree(dlens);CHKERRQ(ierr);
2324   ierr = PetscFree(mask);CHKERRQ(ierr);
2325   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2326   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2327   *newmat = A;
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 #undef __FUNCT__
2332 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2333 /*@
2334    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2335 
2336    Input Parameters:
2337 .  mat  - the matrix
2338 .  fact - factor
2339 
2340    Collective on Mat
2341 
2342    Level: advanced
2343 
2344   Notes:
2345    This can also be set by the command line option: -mat_use_hash_table fact
2346 
2347 .keywords: matrix, hashtable, factor, HT
2348 
2349 .seealso: MatSetOption()
2350 @*/
2351 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2352 {
2353   PetscFunctionBegin;
2354   SETERRQ(1,"Function not yet written for SBAIJ format");
2355   /* PetscFunctionReturn(0); */
2356 }
2357 
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2360 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2361 {
2362   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2363   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2364   PetscReal    atmp;
2365   PetscReal    *work,*svalues,*rvalues;
2366   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2367   int          rank,size,*rowners_bs,dest,count,source;
2368   PetscScalar  *va;
2369   MatScalar    *ba;
2370   MPI_Status   stat;
2371 
2372   PetscFunctionBegin;
2373   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2374   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2375 
2376   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2377   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2378 
2379   bs   = a->bs;
2380   mbs  = a->mbs;
2381   Mbs  = a->Mbs;
2382   ba   = b->a;
2383   bi   = b->i;
2384   bj   = b->j;
2385   /*
2386   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2387   PetscSynchronizedFlush(A->comm);
2388   */
2389 
2390   /* find ownerships */
2391   rowners_bs = a->rowners_bs;
2392   /*
2393   if (!rank){
2394     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2395   }
2396   */
2397 
2398   /* each proc creates an array to be distributed */
2399   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2400   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2401 
2402   /* row_max for B */
2403   if (rank != size-1){
2404     for (i=0; i<mbs; i++) {
2405       ncols = bi[1] - bi[0]; bi++;
2406       brow  = bs*i;
2407       for (j=0; j<ncols; j++){
2408         bcol = bs*(*bj);
2409         for (kcol=0; kcol<bs; kcol++){
2410           col = bcol + kcol;                 /* local col index */
2411           col += rowners_bs[rank+1];      /* global col index */
2412           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2413           for (krow=0; krow<bs; krow++){
2414             atmp = PetscAbsScalar(*ba); ba++;
2415             row = brow + krow;    /* local row index */
2416             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2417             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2418             if (work[col] < atmp) work[col] = atmp;
2419           }
2420         }
2421         bj++;
2422       }
2423     }
2424     /*
2425       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2426       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2427       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2428       */
2429 
2430     /* send values to its owners */
2431     for (dest=rank+1; dest<size; dest++){
2432       svalues = work + rowners_bs[dest];
2433       count   = rowners_bs[dest+1]-rowners_bs[dest];
2434       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2435       /*
2436       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2437       PetscSynchronizedFlush(A->comm);
2438       */
2439     }
2440   }
2441 
2442   /* receive values */
2443   if (rank){
2444     rvalues = work;
2445     count   = rowners_bs[rank+1]-rowners_bs[rank];
2446     for (source=0; source<rank; source++){
2447       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2448       /* process values */
2449       for (i=0; i<count; i++){
2450         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2451       }
2452       /*
2453       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2454       PetscSynchronizedFlush(A->comm);
2455       */
2456     }
2457   }
2458 
2459   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2460   ierr = PetscFree(work);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatRelax_MPISBAIJ"
2466 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2467 {
2468   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2469   int            ierr,mbs=mat->mbs,bs=mat->bs;
2470   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2471   Vec            bb1;
2472 
2473   PetscFunctionBegin;
2474   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2475   if (bs > 1)
2476     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2477 
2478   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2479     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2480       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2481       its--;
2482     }
2483 
2484     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2485     while (its--){
2486 
2487       /* lower triangular part: slvec0b = - B^T*xx */
2488       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2489 
2490       /* copy xx into slvec0a */
2491       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2492       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2493       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2494       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2495 
2496       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2497 
2498       /* copy bb into slvec1a */
2499       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2500       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2501       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2502       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2503 
2504       /* set slvec1b = 0 */
2505       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2506 
2507       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2508       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2509       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2510       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2511 
2512       /* upper triangular part: bb1 = bb1 - B*x */
2513       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2514 
2515       /* local diagonal sweep */
2516       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2517     }
2518     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2519   } else {
2520     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2521   }
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2527 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2528 {
2529   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2530   int            ierr;
2531   PetscScalar    mone=-1.0;
2532   Vec            lvec1,bb1;
2533 
2534   PetscFunctionBegin;
2535   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2536   if (mat->bs > 1)
2537     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2538 
2539   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2540     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2541       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2542       its--;
2543     }
2544 
2545     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2546     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2547     while (its--){
2548       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2549 
2550       /* lower diagonal part: bb1 = bb - B^T*xx */
2551       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2552       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2553 
2554       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2555       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2556       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2557 
2558       /* upper diagonal part: bb1 = bb1 - B*x */
2559       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2560       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2561 
2562       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2563 
2564       /* diagonal sweep */
2565       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2566     }
2567     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2568     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2569   } else {
2570     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2571   }
2572   PetscFunctionReturn(0);
2573 }
2574 
2575