xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision cd0089b03c240696c596d4fef40bc47e257771ec)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
112         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
113         new_j = (int*)(new_a + bs2*new_nz); \
114         new_i = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 #ifndef MatSetValues_SeqBAIJ_B_Private
155 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
185  \
186         /* malloc new storage space */ \
187         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
188         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
189         new_j = (int*)(new_a + bs2*new_nz); \
190         new_i = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 #endif
231 
232 #if defined(PETSC_USE_MAT_SINGLE)
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValues_MPISBAIJ"
235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
238   int          ierr,i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   int         ierr,i,N = m*n*b->bs2;
262   MatScalar   *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283   int         ierr,i,N = m*n;
284   MatScalar   *vsingle;
285 
286   PetscFunctionBegin;
287   SETERRQ(1,"Function not yet written for SBAIJ format");
288   /* PetscFunctionReturn(0); */
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
294 {
295   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
296   int         ierr,i,N = m*n*b->bs2;
297   MatScalar   *vsingle;
298 
299   PetscFunctionBegin;
300   SETERRQ(1,"Function not yet written for SBAIJ format");
301   /* PetscFunctionReturn(0); */
302 }
303 #endif
304 
305 /* Only add/insert a(i,j) with i<=j (blocks).
306    Any a(i,j) with i>j input by user is ingored.
307 */
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
311 {
312   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
313   MatScalar    value;
314   PetscTruth   roworiented = baij->roworiented;
315   int          ierr,i,j,row,col;
316   int          rstart_orig=baij->rstart_bs;
317   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
318   int          cend_orig=baij->cend_bs,bs=baij->bs;
319 
320   /* Some Variables required in the macro */
321   Mat          A = baij->A;
322   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
323   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
324   MatScalar    *aa=a->a;
325 
326   Mat          B = baij->B;
327   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
328   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
329   MatScalar    *ba=b->a;
330 
331   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
332   int          low,high,t,ridx,cidx,bs2=a->bs2;
333   MatScalar    *ap,*bap;
334 
335   /* for stash */
336   int          n_loc, *in_loc=0;
337   MatScalar    *v_loc=0;
338 
339   PetscFunctionBegin;
340 
341   if(!baij->donotstash){
342     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
343     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
344   }
345 
346   for (i=0; i<m; i++) {
347     if (im[i] < 0) continue;
348 #if defined(PETSC_USE_BOPT_g)
349     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
350 #endif
351     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
352       row = im[i] - rstart_orig;              /* local row index */
353       for (j=0; j<n; j++) {
354         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
355         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
356           col = in[j] - cstart_orig;          /* local col index */
357           brow = row/bs; bcol = col/bs;
358           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
359           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
360           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
361           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
362         } else if (in[j] < 0) continue;
363 #if defined(PETSC_USE_BOPT_g)
364         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
365 #endif
366         else {  /* off-diag entry (B) */
367           if (mat->was_assembled) {
368             if (!baij->colmap) {
369               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
370             }
371 #if defined (PETSC_USE_CTABLE)
372             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
373             col  = col - 1;
374 #else
375             col = baij->colmap[in[j]/bs] - 1;
376 #endif
377             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
378               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
379               col =  in[j];
380               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
381               B = baij->B;
382               b = (Mat_SeqBAIJ*)(B)->data;
383               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
384               ba=b->a;
385             } else col += in[j]%bs;
386           } else col = in[j];
387           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
388           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
389           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
390         }
391       }
392     } else {  /* off processor entry */
393       if (!baij->donotstash) {
394         n_loc = 0;
395         for (j=0; j<n; j++){
396           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
397           in_loc[n_loc] = in[j];
398           if (roworiented) {
399             v_loc[n_loc] = v[i*n+j];
400           } else {
401             v_loc[n_loc] = v[j*m+i];
402           }
403           n_loc++;
404         }
405         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
406       }
407     }
408   }
409 
410   if(!baij->donotstash){
411     ierr = PetscFree(in_loc);CHKERRQ(ierr);
412     ierr = PetscFree(v_loc);CHKERRQ(ierr);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
420 {
421   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
422   const MatScalar *value;
423   MatScalar       *barray=baij->barray;
424   PetscTruth      roworiented = baij->roworiented;
425   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
426   int             rend=baij->rend,cstart=baij->cstart,stepval;
427   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
428 
429   PetscFunctionBegin;
430   if(!barray) {
431     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
432     baij->barray = barray;
433   }
434 
435   if (roworiented) {
436     stepval = (n-1)*bs;
437   } else {
438     stepval = (m-1)*bs;
439   }
440   for (i=0; i<m; i++) {
441     if (im[i] < 0) continue;
442 #if defined(PETSC_USE_BOPT_g)
443     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
444 #endif
445     if (im[i] >= rstart && im[i] < rend) {
446       row = im[i] - rstart;
447       for (j=0; j<n; j++) {
448         /* If NumCol = 1 then a copy is not required */
449         if ((roworiented) && (n == 1)) {
450           barray = (MatScalar*) v + i*bs2;
451         } else if((!roworiented) && (m == 1)) {
452           barray = (MatScalar*) v + j*bs2;
453         } else { /* Here a copy is required */
454           if (roworiented) {
455             value = v + i*(stepval+bs)*bs + j*bs;
456           } else {
457             value = v + j*(stepval+bs)*bs + i*bs;
458           }
459           for (ii=0; ii<bs; ii++,value+=stepval) {
460             for (jj=0; jj<bs; jj++) {
461               *barray++  = *value++;
462             }
463           }
464           barray -=bs2;
465         }
466 
467         if (in[j] >= cstart && in[j] < cend){
468           col  = in[j] - cstart;
469           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
470         }
471         else if (in[j] < 0) continue;
472 #if defined(PETSC_USE_BOPT_g)
473         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
474 #endif
475         else {
476           if (mat->was_assembled) {
477             if (!baij->colmap) {
478               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
479             }
480 
481 #if defined(PETSC_USE_BOPT_g)
482 #if defined (PETSC_USE_CTABLE)
483             { int data;
484               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
485               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
486             }
487 #else
488             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
489 #endif
490 #endif
491 #if defined (PETSC_USE_CTABLE)
492 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
493             col  = (col - 1)/bs;
494 #else
495             col = (baij->colmap[in[j]] - 1)/bs;
496 #endif
497             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
498               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
499               col =  in[j];
500             }
501           }
502           else col = in[j];
503           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
504         }
505       }
506     } else {
507       if (!baij->donotstash) {
508         if (roworiented) {
509           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
510         } else {
511           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
512         }
513       }
514     }
515   }
516   PetscFunctionReturn(0);
517 }
518 
519 #define HASH_KEY 0.6180339887
520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
523 #undef __FUNCT__
524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
526 {
527   PetscFunctionBegin;
528   SETERRQ(1,"Function not yet written for SBAIJ format");
529   /* PetscFunctionReturn(0); */
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
535 {
536   PetscFunctionBegin;
537   SETERRQ(1,"Function not yet written for SBAIJ format");
538   /* PetscFunctionReturn(0); */
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatGetValues_MPISBAIJ"
543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
544 {
545   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
546   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
547   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
552     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
553     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
554       row = idxm[i] - bsrstart;
555       for (j=0; j<n; j++) {
556         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
557         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
558         if (idxn[j] >= bscstart && idxn[j] < bscend){
559           col = idxn[j] - bscstart;
560           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
561         } else {
562           if (!baij->colmap) {
563             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
564           }
565 #if defined (PETSC_USE_CTABLE)
566           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
567           data --;
568 #else
569           data = baij->colmap[idxn[j]/bs]-1;
570 #endif
571           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
572           else {
573             col  = data + idxn[j]%bs;
574             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575           }
576         }
577       }
578     } else {
579       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
580     }
581   }
582  PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatNorm_MPISBAIJ"
587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
588 {
589   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
590   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
591   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
592   int        ierr;
593   PetscReal  sum[2],*lnorm2;
594 
595   PetscFunctionBegin;
596   if (baij->size == 1) {
597     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
598   } else {
599     if (type == NORM_FROBENIUS) {
600       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
601       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
602       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
603       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
605       /*
606       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
607       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
608       */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       /*
611       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
612       PetscSynchronizedFlush(mat->comm); */
613 
614       *norm = sqrt(sum[0] + 2*sum[1]);
615       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 /*
624   Creates the hash table, and sets the table
625   This table is created only once.
626   If new entried need to be added to the matrix
627   then the hash table has to be destroyed and
628   recreated.
629 */
630 #undef __FUNCT__
631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
633 {
634   PetscFunctionBegin;
635   SETERRQ(1,"Function not yet written for SBAIJ format");
636   /* PetscFunctionReturn(0); */
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
642 {
643   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
644   int         ierr,nstash,reallocs;
645   InsertMode  addv;
646 
647   PetscFunctionBegin;
648   if (baij->donotstash) {
649     PetscFunctionReturn(0);
650   }
651 
652   /* make sure all processors are either in INSERTMODE or ADDMODE */
653   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
654   if (addv == (ADD_VALUES|INSERT_VALUES)) {
655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
656   }
657   mat->insertmode = addv; /* in case this processor had no cache */
658 
659   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
673   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
674   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
675   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
676   int         *row,*col,other_disassembled;
677   PetscTruth  r1,r2,r3;
678   MatScalar   *val;
679   InsertMode  addv = mat->insertmode;
680 
681   PetscFunctionBegin;
682 
683   if (!baij->donotstash) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       /*
687       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
688       PetscSynchronizedFlush(mat->comm);
689       */
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
695         if (j < n) ncols = j-i;
696         else       ncols = n-i;
697         /* Now assemble all these values with a single function call */
698         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703     /* Now process the block-stash. Since the values are stashed column-oriented,
704        set the roworiented flag to column oriented, and after MatSetValues()
705        restore the original flags */
706     r1 = baij->roworiented;
707     r2 = a->roworiented;
708     r3 = b->roworiented;
709     baij->roworiented = PETSC_FALSE;
710     a->roworiented    = PETSC_FALSE;
711     b->roworiented    = PETSC_FALSE;
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n;) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
719         if (j < n) ncols = j-i;
720         else       ncols = n-i;
721         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
722         i = j;
723       }
724     }
725     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
726     baij->roworiented = r1;
727     a->roworiented    = r2;
728     b->roworiented    = r3;
729   }
730 
731   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746 
747   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
748     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
749   }
750   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
752 
753 #if defined(PETSC_USE_BOPT_g)
754   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
755     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
756     baij->ht_total_ct  = 0;
757     baij->ht_insert_ct = 0;
758   }
759 #endif
760   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
761     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
762     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
763     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
764   }
765 
766   if (baij->rowvalues) {
767     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
768     baij->rowvalues = 0;
769   }
770 
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
779   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
780   PetscTruth        isascii,isdraw;
781   PetscViewer       sviewer;
782   PetscViewerFormat format;
783 
784   PetscFunctionBegin;
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
786   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
787   if (isascii) {
788     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
789     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
790       MatInfo info;
791       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
794               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
795               baij->bs,(int)info.memory);CHKERRQ(ierr);
796       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
797       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
798       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
799       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
800       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
801       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_INFO) {
804       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw       draw;
811     PetscTruth isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
814   }
815 
816   if (size == 1) {
817     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
818     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
819   } else {
820     /* assemble the entire matrix onto first processor. */
821     Mat         A;
822     Mat_SeqSBAIJ *Aloc;
823     Mat_SeqBAIJ *Bloc;
824     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
825     MatScalar   *a;
826 
827     if (!rank) {
828       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
829     } else {
830       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
831     }
832     PetscLogObjectParent(mat,A);
833 
834     /* copy over the A part */
835     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
836     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
837     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
838 
839     for (i=0; i<mbs; i++) {
840       rvals[0] = bs*(baij->rstart + i);
841       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
842       for (j=ai[i]; j<ai[i+1]; j++) {
843         col = (baij->cstart+aj[j])*bs;
844         for (k=0; k<bs; k++) {
845           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
846           col++; a += bs;
847         }
848       }
849     }
850     /* copy over the B part */
851     Bloc = (Mat_SeqBAIJ*)baij->B->data;
852     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = baij->garray[aj[j]]*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     ierr = PetscFree(rvals);CHKERRQ(ierr);
865     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867     /*
868        Everyone has to call to draw the matrix since the graphics waits are
869        synchronized across all processors that share the PetscDraw object
870     */
871     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
872     if (!rank) {
873       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
874       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
875     }
876     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
877     ierr = MatDestroy(A);CHKERRQ(ierr);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatView_MPISBAIJ"
884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
885 {
886   int        ierr;
887   PetscTruth isascii,isdraw,issocket,isbinary;
888 
889   PetscFunctionBegin;
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
894   if (isascii || isdraw || issocket || isbinary) {
895     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
896   } else {
897     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatDestroy_MPISBAIJ"
904 int MatDestroy_MPISBAIJ(Mat mat)
905 {
906   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
907   int         ierr;
908 
909   PetscFunctionBegin;
910 #if defined(PETSC_USE_LOG)
911   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
912 #endif
913   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
914   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
915   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
917   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
918 #if defined (PETSC_USE_CTABLE)
919   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
920 #else
921   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
922 #endif
923   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
924   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
925   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
926   if (baij->slvec0) {
927     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
928     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
929   }
930   if (baij->slvec1) {
931     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
933     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
934   }
935   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
936   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
937   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
938   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
939 #if defined(PETSC_USE_MAT_SINGLE)
940   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
941 #endif
942   ierr = PetscFree(baij);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMult_MPISBAIJ"
948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
949 {
950   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
951   int         ierr,nt,mbs=a->mbs,bs=a->bs;
952   PetscScalar *x,*from,zero=0.0;
953 
954   PetscFunctionBegin;
955   /*
956   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
957   PetscSynchronizedFlush(A->comm);
958   */
959   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
960   if (nt != A->n) {
961     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
962   }
963   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
964   if (nt != A->m) {
965     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
966   }
967 
968   /* diagonal part */
969   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
970   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
971 
972   /* subdiagonal part */
973   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
974 
975   /* copy x into the vec slvec0 */
976   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
977   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
978   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
979   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
980 
981   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
982   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
984 
985   /* supperdiagonal part */
986   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
987 
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
993 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
994 {
995   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
996   int         ierr,nt;
997 
998   PetscFunctionBegin;
999   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1000   if (nt != A->n) {
1001     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1002   }
1003   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1004   if (nt != A->m) {
1005     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1006   }
1007 
1008   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1009   /* do diagonal part */
1010   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1011   /* do supperdiagonal part */
1012   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1013   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1014   /* do subdiagonal part */
1015   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1016   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1017   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1018 
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1024 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1025 {
1026   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1027   int          ierr,mbs=a->mbs,bs=a->bs;
1028   PetscScalar  *x,*from,zero=0.0;
1029 
1030   PetscFunctionBegin;
1031   /*
1032   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1033   PetscSynchronizedFlush(A->comm);
1034   */
1035   /* diagonal part */
1036   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1037   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1038 
1039   /* subdiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1041 
1042   /* copy x into the vec slvec0 */
1043   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1044   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1045   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1046   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1047 
1048   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1049   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1050   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1051 
1052   /* supperdiagonal part */
1053   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1054 
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1060 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1061 {
1062   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1063   int        ierr;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1067   /* do diagonal part */
1068   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1069   /* do supperdiagonal part */
1070   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1071   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1072 
1073   /* do subdiagonal part */
1074   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1075   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1076   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1077 
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1083 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1084 {
1085   PetscFunctionBegin;
1086   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1087   /* PetscFunctionReturn(0); */
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1092 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1093 {
1094   PetscFunctionBegin;
1095   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1096   /* PetscFunctionReturn(0); */
1097 }
1098 
1099 /*
1100   This only works correctly for square matrices where the subblock A->A is the
1101    diagonal block
1102 */
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1105 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1106 {
1107   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1108   int         ierr;
1109 
1110   PetscFunctionBegin;
1111   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1112   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatScale_MPISBAIJ"
1118 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1119 {
1120   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1121   int         ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1125   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1131 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1132 {
1133   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1134   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1135   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1136   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1137   int            *cmap,*idx_p,cstart = mat->cstart;
1138 
1139   PetscFunctionBegin;
1140   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1141   mat->getrowactive = PETSC_TRUE;
1142 
1143   if (!mat->rowvalues && (idx || v)) {
1144     /*
1145         allocate enough space to hold information from the longest row.
1146     */
1147     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1148     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1149     int     max = 1,mbs = mat->mbs,tmp;
1150     for (i=0; i<mbs; i++) {
1151       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1152       if (max < tmp) { max = tmp; }
1153     }
1154     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1155     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1156   }
1157 
1158   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1159   lrow = row - brstart;  /* local row index */
1160 
1161   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1162   if (!v)   {pvA = 0; pvB = 0;}
1163   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1164   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1165   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1166   nztot = nzA + nzB;
1167 
1168   cmap  = mat->garray;
1169   if (v  || idx) {
1170     if (nztot) {
1171       /* Sort by increasing column numbers, assuming A and B already sorted */
1172       int imark = -1;
1173       if (v) {
1174         *v = v_p = mat->rowvalues;
1175         for (i=0; i<nzB; i++) {
1176           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1177           else break;
1178         }
1179         imark = i;
1180         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1181         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1182       }
1183       if (idx) {
1184         *idx = idx_p = mat->rowindices;
1185         if (imark > -1) {
1186           for (i=0; i<imark; i++) {
1187             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1188           }
1189         } else {
1190           for (i=0; i<nzB; i++) {
1191             if (cmap[cworkB[i]/bs] < cstart)
1192               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1193             else break;
1194           }
1195           imark = i;
1196         }
1197         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1198         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1199       }
1200     } else {
1201       if (idx) *idx = 0;
1202       if (v)   *v   = 0;
1203     }
1204   }
1205   *nz = nztot;
1206   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1207   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1213 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1214 {
1215   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1216 
1217   PetscFunctionBegin;
1218   if (baij->getrowactive == PETSC_FALSE) {
1219     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1220   }
1221   baij->getrowactive = PETSC_FALSE;
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1227 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1228 {
1229   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1230 
1231   PetscFunctionBegin;
1232   *bs = baij->bs;
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1238 int MatZeroEntries_MPISBAIJ(Mat A)
1239 {
1240   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1241   int         ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1245   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1251 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1252 {
1253   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1254   Mat         A = a->A,B = a->B;
1255   int         ierr;
1256   PetscReal   isend[5],irecv[5];
1257 
1258   PetscFunctionBegin;
1259   info->block_size     = (PetscReal)a->bs;
1260   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1261   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1262   isend[3] = info->memory;  isend[4] = info->mallocs;
1263   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1264   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1265   isend[3] += info->memory;  isend[4] += info->mallocs;
1266   if (flag == MAT_LOCAL) {
1267     info->nz_used      = isend[0];
1268     info->nz_allocated = isend[1];
1269     info->nz_unneeded  = isend[2];
1270     info->memory       = isend[3];
1271     info->mallocs      = isend[4];
1272   } else if (flag == MAT_GLOBAL_MAX) {
1273     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1274     info->nz_used      = irecv[0];
1275     info->nz_allocated = irecv[1];
1276     info->nz_unneeded  = irecv[2];
1277     info->memory       = irecv[3];
1278     info->mallocs      = irecv[4];
1279   } else if (flag == MAT_GLOBAL_SUM) {
1280     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1281     info->nz_used      = irecv[0];
1282     info->nz_allocated = irecv[1];
1283     info->nz_unneeded  = irecv[2];
1284     info->memory       = irecv[3];
1285     info->mallocs      = irecv[4];
1286   } else {
1287     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1288   }
1289   info->rows_global       = (PetscReal)A->M;
1290   info->columns_global    = (PetscReal)A->N;
1291   info->rows_local        = (PetscReal)A->m;
1292   info->columns_local     = (PetscReal)A->N;
1293   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1294   info->fill_ratio_needed = 0;
1295   info->factor_mallocs    = 0;
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1301 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1302 {
1303   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1304   int         ierr;
1305 
1306   PetscFunctionBegin;
1307   switch (op) {
1308   case MAT_NO_NEW_NONZERO_LOCATIONS:
1309   case MAT_YES_NEW_NONZERO_LOCATIONS:
1310   case MAT_COLUMNS_UNSORTED:
1311   case MAT_COLUMNS_SORTED:
1312   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1313   case MAT_KEEP_ZEROED_ROWS:
1314   case MAT_NEW_NONZERO_LOCATION_ERR:
1315     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1316     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1317     break;
1318   case MAT_ROW_ORIENTED:
1319     a->roworiented = PETSC_TRUE;
1320     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1321     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1322     break;
1323   case MAT_ROWS_SORTED:
1324   case MAT_ROWS_UNSORTED:
1325   case MAT_YES_NEW_DIAGONALS:
1326     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1327     break;
1328   case MAT_COLUMN_ORIENTED:
1329     a->roworiented = PETSC_FALSE;
1330     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1331     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1332     break;
1333   case MAT_IGNORE_OFF_PROC_ENTRIES:
1334     a->donotstash = PETSC_TRUE;
1335     break;
1336   case MAT_NO_NEW_DIAGONALS:
1337     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1338   case MAT_USE_HASH_TABLE:
1339     a->ht_flag = PETSC_TRUE;
1340     break;
1341   case MAT_NOT_SYMMETRIC:
1342   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1343   case MAT_HERMITIAN:
1344     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1345   case MAT_SYMMETRIC:
1346   case MAT_STRUCTURALLY_SYMMETRIC:
1347   case MAT_NOT_HERMITIAN:
1348   case MAT_SYMMETRY_ETERNAL:
1349   case MAT_NOT_SYMMETRY_ETERNAL:
1350     break;
1351   default:
1352     SETERRQ(PETSC_ERR_SUP,"unknown option");
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1359 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1360 {
1361   int ierr;
1362   PetscFunctionBegin;
1363   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1369 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1370 {
1371   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1372   Mat         a = baij->A,b = baij->B;
1373   int         ierr,s1,s2,s3;
1374 
1375   PetscFunctionBegin;
1376   if (ll != rr) {
1377     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1378   }
1379   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1380   if (rr) {
1381     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1382     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1383     /* Overlap communication with computation. */
1384     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1385     /*} if (ll) { */
1386     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1387     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1388     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1389     /* } */
1390   /* scale  the diagonal block */
1391   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1392 
1393   /* if (rr) { */
1394     /* Do a scatter end and then right scale the off-diagonal block */
1395     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1396     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1397   }
1398 
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1404 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1405 {
1406   PetscFunctionBegin;
1407   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1412 int MatPrintHelp_MPISBAIJ(Mat A)
1413 {
1414   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1415   MPI_Comm    comm = A->comm;
1416   static int  called = 0;
1417   int         ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!a->rank) {
1421     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1422   }
1423   if (called) {PetscFunctionReturn(0);} else called = 1;
1424   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1425   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1431 int MatSetUnfactored_MPISBAIJ(Mat A)
1432 {
1433   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1434   int         ierr;
1435 
1436   PetscFunctionBegin;
1437   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatEqual_MPISBAIJ"
1445 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1446 {
1447   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1448   Mat         a,b,c,d;
1449   PetscTruth  flg;
1450   int         ierr;
1451 
1452   PetscFunctionBegin;
1453   a = matA->A; b = matA->B;
1454   c = matB->A; d = matB->B;
1455 
1456   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1457   if (flg == PETSC_TRUE) {
1458     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1459   }
1460   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1466 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1467 {
1468   int        ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 /* -------------------------------------------------------------------*/
1475 static struct _MatOps MatOps_Values = {
1476        MatSetValues_MPISBAIJ,
1477        MatGetRow_MPISBAIJ,
1478        MatRestoreRow_MPISBAIJ,
1479        MatMult_MPISBAIJ,
1480 /* 4*/ MatMultAdd_MPISBAIJ,
1481        MatMultTranspose_MPISBAIJ,
1482        MatMultTransposeAdd_MPISBAIJ,
1483        0,
1484        0,
1485        0,
1486 /*10*/ 0,
1487        0,
1488        0,
1489        MatRelax_MPISBAIJ,
1490        MatTranspose_MPISBAIJ,
1491 /*15*/ MatGetInfo_MPISBAIJ,
1492        MatEqual_MPISBAIJ,
1493        MatGetDiagonal_MPISBAIJ,
1494        MatDiagonalScale_MPISBAIJ,
1495        MatNorm_MPISBAIJ,
1496 /*20*/ MatAssemblyBegin_MPISBAIJ,
1497        MatAssemblyEnd_MPISBAIJ,
1498        0,
1499        MatSetOption_MPISBAIJ,
1500        MatZeroEntries_MPISBAIJ,
1501 /*25*/ MatZeroRows_MPISBAIJ,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1507        0,
1508        0,
1509        0,
1510        0,
1511 /*35*/ MatDuplicate_MPISBAIJ,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*40*/ 0,
1517        0,
1518        0,
1519        MatGetValues_MPISBAIJ,
1520        0,
1521 /*45*/ MatPrintHelp_MPISBAIJ,
1522        MatScale_MPISBAIJ,
1523        0,
1524        0,
1525        0,
1526 /*50*/ MatGetBlockSize_MPISBAIJ,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*55*/ 0,
1532        0,
1533        MatSetUnfactored_MPISBAIJ,
1534        0,
1535        MatSetValuesBlocked_MPISBAIJ,
1536 /*60*/ 0,
1537        0,
1538        0,
1539        MatGetPetscMaps_Petsc,
1540        0,
1541 /*65*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*70*/ MatGetRowMax_MPISBAIJ,
1547        0,
1548        0,
1549        0,
1550        0,
1551 /*75*/ 0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*80*/ 0,
1557        0,
1558        0,
1559        0,
1560 /*85*/ MatLoad_MPISBAIJ
1561 };
1562 
1563 
1564 EXTERN_C_BEGIN
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1567 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1568 {
1569   PetscFunctionBegin;
1570   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1571   *iscopy = PETSC_FALSE;
1572   PetscFunctionReturn(0);
1573 }
1574 EXTERN_C_END
1575 
1576 EXTERN_C_BEGIN
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1579 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1580 {
1581   Mat_MPISBAIJ *b;
1582   int          ierr,i,mbs,Mbs;
1583 
1584   PetscFunctionBegin;
1585   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1586 
1587   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1588   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1589   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1590   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1591   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1592   if (d_nnz) {
1593     for (i=0; i<B->m/bs; i++) {
1594       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1595     }
1596   }
1597   if (o_nnz) {
1598     for (i=0; i<B->m/bs; i++) {
1599       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1600     }
1601   }
1602   B->preallocated = PETSC_TRUE;
1603   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1604   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1605   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1606   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1607 
1608   b   = (Mat_MPISBAIJ*)B->data;
1609   mbs = B->m/bs;
1610   Mbs = B->M/bs;
1611   if (mbs*bs != B->m) {
1612     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1613   }
1614 
1615   b->bs  = bs;
1616   b->bs2 = bs*bs;
1617   b->mbs = mbs;
1618   b->nbs = mbs;
1619   b->Mbs = Mbs;
1620   b->Nbs = Mbs;
1621 
1622   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1623   b->rowners[0]    = 0;
1624   for (i=2; i<=b->size; i++) {
1625     b->rowners[i] += b->rowners[i-1];
1626   }
1627   b->rstart    = b->rowners[b->rank];
1628   b->rend      = b->rowners[b->rank+1];
1629   b->cstart    = b->rstart;
1630   b->cend      = b->rend;
1631   for (i=0; i<=b->size; i++) {
1632     b->rowners_bs[i] = b->rowners[i]*bs;
1633   }
1634   b->rstart_bs = b-> rstart*bs;
1635   b->rend_bs   = b->rend*bs;
1636 
1637   b->cstart_bs = b->cstart*bs;
1638   b->cend_bs   = b->cend*bs;
1639 
1640 
1641   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1642   PetscLogObjectParent(B,b->A);
1643   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1644   PetscLogObjectParent(B,b->B);
1645 
1646   /* build cache for off array entries formed */
1647   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1648 
1649   PetscFunctionReturn(0);
1650 }
1651 EXTERN_C_END
1652 
1653 /*MC
1654    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1655    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1656 
1657    Options Database Keys:
1658 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1659 
1660   Level: beginner
1661 
1662 .seealso: MatCreateMPISBAIJ
1663 M*/
1664 
1665 EXTERN_C_BEGIN
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatCreate_MPISBAIJ"
1668 int MatCreate_MPISBAIJ(Mat B)
1669 {
1670   Mat_MPISBAIJ *b;
1671   int          ierr;
1672   PetscTruth   flg;
1673 
1674   PetscFunctionBegin;
1675 
1676   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1677   B->data = (void*)b;
1678   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1679   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1680 
1681   B->ops->destroy    = MatDestroy_MPISBAIJ;
1682   B->ops->view       = MatView_MPISBAIJ;
1683   B->mapping    = 0;
1684   B->factor     = 0;
1685   B->assembled  = PETSC_FALSE;
1686 
1687   B->insertmode = NOT_SET_VALUES;
1688   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1689   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1690 
1691   /* build local table of row and column ownerships */
1692   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1693   b->cowners    = b->rowners + b->size + 2;
1694   b->rowners_bs = b->cowners + b->size + 2;
1695   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1696 
1697   /* build cache for off array entries formed */
1698   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1699   b->donotstash  = PETSC_FALSE;
1700   b->colmap      = PETSC_NULL;
1701   b->garray      = PETSC_NULL;
1702   b->roworiented = PETSC_TRUE;
1703 
1704 #if defined(PETSC_USE_MAT_SINGLE)
1705   /* stuff for MatSetValues_XXX in single precision */
1706   b->setvalueslen     = 0;
1707   b->setvaluescopy    = PETSC_NULL;
1708 #endif
1709 
1710   /* stuff used in block assembly */
1711   b->barray       = 0;
1712 
1713   /* stuff used for matrix vector multiply */
1714   b->lvec         = 0;
1715   b->Mvctx        = 0;
1716   b->slvec0       = 0;
1717   b->slvec0b      = 0;
1718   b->slvec1       = 0;
1719   b->slvec1a      = 0;
1720   b->slvec1b      = 0;
1721   b->sMvctx       = 0;
1722 
1723   /* stuff for MatGetRow() */
1724   b->rowindices   = 0;
1725   b->rowvalues    = 0;
1726   b->getrowactive = PETSC_FALSE;
1727 
1728   /* hash table stuff */
1729   b->ht           = 0;
1730   b->hd           = 0;
1731   b->ht_size      = 0;
1732   b->ht_flag      = PETSC_FALSE;
1733   b->ht_fact      = 0;
1734   b->ht_total_ct  = 0;
1735   b->ht_insert_ct = 0;
1736 
1737   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1738   if (flg) {
1739     PetscReal fact = 1.39;
1740     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1741     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1742     if (fact <= 1.0) fact = 1.39;
1743     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1744     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1745   }
1746   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1747                                      "MatStoreValues_MPISBAIJ",
1748                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1749   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1750                                      "MatRetrieveValues_MPISBAIJ",
1751                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1752   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1753                                      "MatGetDiagonalBlock_MPISBAIJ",
1754                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1755   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1756                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1757                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 EXTERN_C_END
1761 
1762 /*MC
1763    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1764 
1765    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1766    and MATMPISBAIJ otherwise.
1767 
1768    Options Database Keys:
1769 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1770 
1771   Level: beginner
1772 
1773 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1774 M*/
1775 
1776 EXTERN_C_BEGIN
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatCreate_SBAIJ"
1779 int MatCreate_SBAIJ(Mat A) {
1780   int ierr,size;
1781 
1782   PetscFunctionBegin;
1783   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1784   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1785   if (size == 1) {
1786     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1787   } else {
1788     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1789   }
1790   PetscFunctionReturn(0);
1791 }
1792 EXTERN_C_END
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1796 /*@C
1797    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1798    the user should preallocate the matrix storage by setting the parameters
1799    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1800    performance can be increased by more than a factor of 50.
1801 
1802    Collective on Mat
1803 
1804    Input Parameters:
1805 +  A - the matrix
1806 .  bs   - size of blockk
1807 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1808            submatrix  (same for all local rows)
1809 .  d_nnz - array containing the number of block nonzeros in the various block rows
1810            in the upper triangular and diagonal part of the in diagonal portion of the local
1811            (possibly different for each block row) or PETSC_NULL.  You must leave room
1812            for the diagonal entry even if it is zero.
1813 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1814            submatrix (same for all local rows).
1815 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1816            off-diagonal portion of the local submatrix (possibly different for
1817            each block row) or PETSC_NULL.
1818 
1819 
1820    Options Database Keys:
1821 .   -mat_no_unroll - uses code that does not unroll the loops in the
1822                      block calculations (much slower)
1823 .   -mat_block_size - size of the blocks to use
1824 
1825    Notes:
1826 
1827    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1828    than it must be used on all processors that share the object for that argument.
1829 
1830    Storage Information:
1831    For a square global matrix we define each processor's diagonal portion
1832    to be its local rows and the corresponding columns (a square submatrix);
1833    each processor's off-diagonal portion encompasses the remainder of the
1834    local matrix (a rectangular submatrix).
1835 
1836    The user can specify preallocated storage for the diagonal part of
1837    the local submatrix with either d_nz or d_nnz (not both).  Set
1838    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1839    memory allocation.  Likewise, specify preallocated storage for the
1840    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1841 
1842    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1843    the figure below we depict these three local rows and all columns (0-11).
1844 
1845 .vb
1846            0 1 2 3 4 5 6 7 8 9 10 11
1847           -------------------
1848    row 3  |  o o o d d d o o o o o o
1849    row 4  |  o o o d d d o o o o o o
1850    row 5  |  o o o d d d o o o o o o
1851           -------------------
1852 .ve
1853 
1854    Thus, any entries in the d locations are stored in the d (diagonal)
1855    submatrix, and any entries in the o locations are stored in the
1856    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1857    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1858 
1859    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1860    plus the diagonal part of the d matrix,
1861    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1862    In general, for PDE problems in which most nonzeros are near the diagonal,
1863    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1864    or you will get TERRIBLE performance; see the users' manual chapter on
1865    matrices.
1866 
1867    Level: intermediate
1868 
1869 .keywords: matrix, block, aij, compressed row, sparse, parallel
1870 
1871 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1872 @*/
1873 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1874 {
1875   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1876 
1877   PetscFunctionBegin;
1878   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1879   if (f) {
1880     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatCreateMPISBAIJ"
1887 /*@C
1888    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1889    (block compressed row).  For good matrix assembly performance
1890    the user should preallocate the matrix storage by setting the parameters
1891    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1892    performance can be increased by more than a factor of 50.
1893 
1894    Collective on MPI_Comm
1895 
1896    Input Parameters:
1897 +  comm - MPI communicator
1898 .  bs   - size of blockk
1899 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1900            This value should be the same as the local size used in creating the
1901            y vector for the matrix-vector product y = Ax.
1902 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1903            This value should be the same as the local size used in creating the
1904            x vector for the matrix-vector product y = Ax.
1905 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1906 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1907 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1908            submatrix  (same for all local rows)
1909 .  d_nnz - array containing the number of block nonzeros in the various block rows
1910            in the upper triangular portion of the in diagonal portion of the local
1911            (possibly different for each block block row) or PETSC_NULL.
1912            You must leave room for the diagonal entry even if it is zero.
1913 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1914            submatrix (same for all local rows).
1915 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1916            off-diagonal portion of the local submatrix (possibly different for
1917            each block row) or PETSC_NULL.
1918 
1919    Output Parameter:
1920 .  A - the matrix
1921 
1922    Options Database Keys:
1923 .   -mat_no_unroll - uses code that does not unroll the loops in the
1924                      block calculations (much slower)
1925 .   -mat_block_size - size of the blocks to use
1926 .   -mat_mpi - use the parallel matrix data structures even on one processor
1927                (defaults to using SeqBAIJ format on one processor)
1928 
1929    Notes:
1930    The user MUST specify either the local or global matrix dimensions
1931    (possibly both).
1932 
1933    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1934    than it must be used on all processors that share the object for that argument.
1935 
1936    Storage Information:
1937    For a square global matrix we define each processor's diagonal portion
1938    to be its local rows and the corresponding columns (a square submatrix);
1939    each processor's off-diagonal portion encompasses the remainder of the
1940    local matrix (a rectangular submatrix).
1941 
1942    The user can specify preallocated storage for the diagonal part of
1943    the local submatrix with either d_nz or d_nnz (not both).  Set
1944    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1945    memory allocation.  Likewise, specify preallocated storage for the
1946    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1947 
1948    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1949    the figure below we depict these three local rows and all columns (0-11).
1950 
1951 .vb
1952            0 1 2 3 4 5 6 7 8 9 10 11
1953           -------------------
1954    row 3  |  o o o d d d o o o o o o
1955    row 4  |  o o o d d d o o o o o o
1956    row 5  |  o o o d d d o o o o o o
1957           -------------------
1958 .ve
1959 
1960    Thus, any entries in the d locations are stored in the d (diagonal)
1961    submatrix, and any entries in the o locations are stored in the
1962    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1963    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1964 
1965    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1966    plus the diagonal part of the d matrix,
1967    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1968    In general, for PDE problems in which most nonzeros are near the diagonal,
1969    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1970    or you will get TERRIBLE performance; see the users' manual chapter on
1971    matrices.
1972 
1973    Level: intermediate
1974 
1975 .keywords: matrix, block, aij, compressed row, sparse, parallel
1976 
1977 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1978 @*/
1979 
1980 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
1981 {
1982   int ierr,size;
1983 
1984   PetscFunctionBegin;
1985   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1986   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1987   if (size > 1) {
1988     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1989     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1990   } else {
1991     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1992     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2000 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2001 {
2002   Mat          mat;
2003   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2004   int          ierr,len=0;
2005 
2006   PetscFunctionBegin;
2007   *newmat       = 0;
2008   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2009   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2010   mat->preallocated = PETSC_TRUE;
2011   a = (Mat_MPISBAIJ*)mat->data;
2012   a->bs  = oldmat->bs;
2013   a->bs2 = oldmat->bs2;
2014   a->mbs = oldmat->mbs;
2015   a->nbs = oldmat->nbs;
2016   a->Mbs = oldmat->Mbs;
2017   a->Nbs = oldmat->Nbs;
2018 
2019   a->rstart       = oldmat->rstart;
2020   a->rend         = oldmat->rend;
2021   a->cstart       = oldmat->cstart;
2022   a->cend         = oldmat->cend;
2023   a->size         = oldmat->size;
2024   a->rank         = oldmat->rank;
2025   a->donotstash   = oldmat->donotstash;
2026   a->roworiented  = oldmat->roworiented;
2027   a->rowindices   = 0;
2028   a->rowvalues    = 0;
2029   a->getrowactive = PETSC_FALSE;
2030   a->barray       = 0;
2031   a->rstart_bs    = oldmat->rstart_bs;
2032   a->rend_bs      = oldmat->rend_bs;
2033   a->cstart_bs    = oldmat->cstart_bs;
2034   a->cend_bs      = oldmat->cend_bs;
2035 
2036   /* hash table stuff */
2037   a->ht           = 0;
2038   a->hd           = 0;
2039   a->ht_size      = 0;
2040   a->ht_flag      = oldmat->ht_flag;
2041   a->ht_fact      = oldmat->ht_fact;
2042   a->ht_total_ct  = 0;
2043   a->ht_insert_ct = 0;
2044 
2045   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2046   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2047   a->cowners    = a->rowners + a->size + 2;
2048   a->rowners_bs = a->cowners + a->size + 2;
2049   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2050   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2051   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2052   if (oldmat->colmap) {
2053 #if defined (PETSC_USE_CTABLE)
2054     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2055 #else
2056     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2057     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2058     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2059 #endif
2060   } else a->colmap = 0;
2061   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2062     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2063     PetscLogObjectMemory(mat,len*sizeof(int));
2064     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2065   } else a->garray = 0;
2066 
2067   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2068   PetscLogObjectParent(mat,a->lvec);
2069   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2070 
2071   PetscLogObjectParent(mat,a->Mvctx);
2072   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2073   PetscLogObjectParent(mat,a->A);
2074   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2075   PetscLogObjectParent(mat,a->B);
2076   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2077   *newmat = mat;
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 #include "petscsys.h"
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatLoad_MPISBAIJ"
2085 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2086 {
2087   Mat          A;
2088   int          i,nz,ierr,j,rstart,rend,fd;
2089   PetscScalar  *vals,*buf;
2090   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2091   MPI_Status   status;
2092   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2093   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2094   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2095   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2096   int          dcount,kmax,k,nzcount,tmp;
2097 
2098   PetscFunctionBegin;
2099   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2100 
2101   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2102   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2103   if (!rank) {
2104     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2105     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2106     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2107     if (header[3] < 0) {
2108       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2109     }
2110   }
2111 
2112   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2113   M = header[1]; N = header[2];
2114 
2115   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2116 
2117   /*
2118      This code adds extra rows to make sure the number of rows is
2119      divisible by the blocksize
2120   */
2121   Mbs        = M/bs;
2122   extra_rows = bs - M + bs*(Mbs);
2123   if (extra_rows == bs) extra_rows = 0;
2124   else                  Mbs++;
2125   if (extra_rows &&!rank) {
2126     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2127   }
2128 
2129   /* determine ownership of all rows */
2130   mbs        = Mbs/size + ((Mbs % size) > rank);
2131   m          = mbs*bs;
2132   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2133   browners   = rowners + size + 1;
2134   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2135   rowners[0] = 0;
2136   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2137   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2138   rstart = rowners[rank];
2139   rend   = rowners[rank+1];
2140 
2141   /* distribute row lengths to all processors */
2142   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2143   if (!rank) {
2144     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2145     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2146     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2147     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2148     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2149     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2150     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2151   } else {
2152     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2153   }
2154 
2155   if (!rank) {   /* procs[0] */
2156     /* calculate the number of nonzeros on each processor */
2157     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2158     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2159     for (i=0; i<size; i++) {
2160       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2161         procsnz[i] += rowlengths[j];
2162       }
2163     }
2164     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2165 
2166     /* determine max buffer needed and allocate it */
2167     maxnz = 0;
2168     for (i=0; i<size; i++) {
2169       maxnz = PetscMax(maxnz,procsnz[i]);
2170     }
2171     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2172 
2173     /* read in my part of the matrix column indices  */
2174     nz     = procsnz[0];
2175     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2176     mycols = ibuf;
2177     if (size == 1)  nz -= extra_rows;
2178     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2179     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2180 
2181     /* read in every ones (except the last) and ship off */
2182     for (i=1; i<size-1; i++) {
2183       nz   = procsnz[i];
2184       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2185       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2186     }
2187     /* read in the stuff for the last proc */
2188     if (size != 1) {
2189       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2190       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2191       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2192       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2193     }
2194     ierr = PetscFree(cols);CHKERRQ(ierr);
2195   } else {  /* procs[i], i>0 */
2196     /* determine buffer space needed for message */
2197     nz = 0;
2198     for (i=0; i<m; i++) {
2199       nz += locrowlens[i];
2200     }
2201     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2202     mycols = ibuf;
2203     /* receive message of column indices*/
2204     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2205     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2206     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2207   }
2208 
2209   /* loop over local rows, determining number of off diagonal entries */
2210   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2211   odlens   = dlens + (rend-rstart);
2212   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2213   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2214   masked1  = mask    + Mbs;
2215   masked2  = masked1 + Mbs;
2216   rowcount = 0; nzcount = 0;
2217   for (i=0; i<mbs; i++) {
2218     dcount  = 0;
2219     odcount = 0;
2220     for (j=0; j<bs; j++) {
2221       kmax = locrowlens[rowcount];
2222       for (k=0; k<kmax; k++) {
2223         tmp = mycols[nzcount++]/bs; /* block col. index */
2224         if (!mask[tmp]) {
2225           mask[tmp] = 1;
2226           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2227           else masked1[dcount++] = tmp; /* entry in diag portion */
2228         }
2229       }
2230       rowcount++;
2231     }
2232 
2233     dlens[i]  = dcount;  /* d_nzz[i] */
2234     odlens[i] = odcount; /* o_nzz[i] */
2235 
2236     /* zero out the mask elements we set */
2237     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2238     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2239   }
2240 
2241   /* create our matrix */
2242   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2243   ierr = MatSetType(A,type);CHKERRQ(ierr);
2244   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2245   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2246 
2247   if (!rank) {
2248     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2249     /* read in my part of the matrix numerical values  */
2250     nz = procsnz[0];
2251     vals = buf;
2252     mycols = ibuf;
2253     if (size == 1)  nz -= extra_rows;
2254     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2255     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2256 
2257     /* insert into matrix */
2258     jj      = rstart*bs;
2259     for (i=0; i<m; i++) {
2260       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2261       mycols += locrowlens[i];
2262       vals   += locrowlens[i];
2263       jj++;
2264     }
2265 
2266     /* read in other processors (except the last one) and ship out */
2267     for (i=1; i<size-1; i++) {
2268       nz   = procsnz[i];
2269       vals = buf;
2270       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2271       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2272     }
2273     /* the last proc */
2274     if (size != 1){
2275       nz   = procsnz[i] - extra_rows;
2276       vals = buf;
2277       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2278       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2279       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2280     }
2281     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2282 
2283   } else {
2284     /* receive numeric values */
2285     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2286 
2287     /* receive message of values*/
2288     vals   = buf;
2289     mycols = ibuf;
2290     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2291     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2292     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2293 
2294     /* insert into matrix */
2295     jj      = rstart*bs;
2296     for (i=0; i<m; i++) {
2297       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2298       mycols += locrowlens[i];
2299       vals   += locrowlens[i];
2300       jj++;
2301     }
2302   }
2303 
2304   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2305   ierr = PetscFree(buf);CHKERRQ(ierr);
2306   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2307   ierr = PetscFree(rowners);CHKERRQ(ierr);
2308   ierr = PetscFree(dlens);CHKERRQ(ierr);
2309   ierr = PetscFree(mask);CHKERRQ(ierr);
2310   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2311   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   *newmat = A;
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2318 /*@
2319    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2320 
2321    Input Parameters:
2322 .  mat  - the matrix
2323 .  fact - factor
2324 
2325    Collective on Mat
2326 
2327    Level: advanced
2328 
2329   Notes:
2330    This can also be set by the command line option: -mat_use_hash_table fact
2331 
2332 .keywords: matrix, hashtable, factor, HT
2333 
2334 .seealso: MatSetOption()
2335 @*/
2336 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2337 {
2338   PetscFunctionBegin;
2339   SETERRQ(1,"Function not yet written for SBAIJ format");
2340   /* PetscFunctionReturn(0); */
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2345 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2346 {
2347   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2348   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2349   PetscReal    atmp;
2350   PetscReal    *work,*svalues,*rvalues;
2351   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2352   int          rank,size,*rowners_bs,dest,count,source;
2353   PetscScalar  *va;
2354   MatScalar    *ba;
2355   MPI_Status   stat;
2356 
2357   PetscFunctionBegin;
2358   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2359   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2360 
2361   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2362   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2363 
2364   bs   = a->bs;
2365   mbs  = a->mbs;
2366   Mbs  = a->Mbs;
2367   ba   = b->a;
2368   bi   = b->i;
2369   bj   = b->j;
2370   /*
2371   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2372   PetscSynchronizedFlush(A->comm);
2373   */
2374 
2375   /* find ownerships */
2376   rowners_bs = a->rowners_bs;
2377   /*
2378   if (!rank){
2379     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2380   }
2381   */
2382 
2383   /* each proc creates an array to be distributed */
2384   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2385   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2386 
2387   /* row_max for B */
2388   if (rank != size-1){
2389     for (i=0; i<mbs; i++) {
2390       ncols = bi[1] - bi[0]; bi++;
2391       brow  = bs*i;
2392       for (j=0; j<ncols; j++){
2393         bcol = bs*(*bj);
2394         for (kcol=0; kcol<bs; kcol++){
2395           col = bcol + kcol;                 /* local col index */
2396           col += rowners_bs[rank+1];      /* global col index */
2397           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2398           for (krow=0; krow<bs; krow++){
2399             atmp = PetscAbsScalar(*ba); ba++;
2400             row = brow + krow;    /* local row index */
2401             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2402             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2403             if (work[col] < atmp) work[col] = atmp;
2404           }
2405         }
2406         bj++;
2407       }
2408     }
2409     /*
2410       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2411       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2412       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2413       */
2414 
2415     /* send values to its owners */
2416     for (dest=rank+1; dest<size; dest++){
2417       svalues = work + rowners_bs[dest];
2418       count   = rowners_bs[dest+1]-rowners_bs[dest];
2419       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2420       /*
2421       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2422       PetscSynchronizedFlush(A->comm);
2423       */
2424     }
2425   }
2426 
2427   /* receive values */
2428   if (rank){
2429     rvalues = work;
2430     count   = rowners_bs[rank+1]-rowners_bs[rank];
2431     for (source=0; source<rank; source++){
2432       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2433       /* process values */
2434       for (i=0; i<count; i++){
2435         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2436       }
2437       /*
2438       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2439       PetscSynchronizedFlush(A->comm);
2440       */
2441     }
2442   }
2443 
2444   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2445   ierr = PetscFree(work);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatRelax_MPISBAIJ"
2451 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2452 {
2453   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2454   int            ierr,mbs=mat->mbs,bs=mat->bs;
2455   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2456   Vec            bb1;
2457 
2458   PetscFunctionBegin;
2459   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2460   if (bs > 1)
2461     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2462 
2463   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2464     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2465       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2466       its--;
2467     }
2468 
2469     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2470     while (its--){
2471 
2472       /* lower triangular part: slvec0b = - B^T*xx */
2473       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2474 
2475       /* copy xx into slvec0a */
2476       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2477       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2478       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2479       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2480 
2481       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2482 
2483       /* copy bb into slvec1a */
2484       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2485       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2486       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2487       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2488 
2489       /* set slvec1b = 0 */
2490       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2491 
2492       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2493       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2494       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2495       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2496 
2497       /* upper triangular part: bb1 = bb1 - B*x */
2498       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2499 
2500       /* local diagonal sweep */
2501       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2502     }
2503     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2504   } else {
2505     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2506   }
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2512 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2513 {
2514   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2515   int            ierr;
2516   PetscScalar    mone=-1.0;
2517   Vec            lvec1,bb1;
2518 
2519   PetscFunctionBegin;
2520   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2521   if (mat->bs > 1)
2522     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2523 
2524   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2525     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2526       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2527       its--;
2528     }
2529 
2530     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2531     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2532     while (its--){
2533       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2534 
2535       /* lower diagonal part: bb1 = bb - B^T*xx */
2536       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2537       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2538 
2539       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2540       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2541       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2542 
2543       /* upper diagonal part: bb1 = bb1 - B*x */
2544       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2545       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2546 
2547       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2548 
2549       /* diagonal sweep */
2550       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2551     }
2552     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2553     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2554   } else {
2555     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2556   }
2557   PetscFunctionReturn(0);
2558 }
2559 
2560