xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b189386232a1004049c31fcd080b472f8d6830ee)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *);
12 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 extern int MatPrintHelp_SeqSBAIJ(Mat);
17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
21 
22 /*  UGLY, ugly, ugly
23    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
24    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
25    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
26    converts the entries into single precision and then calls ..._MatScalar() to put them
27    into the single precision data structures.
28 */
29 #if defined(PETSC_USE_MAT_SINGLE)
30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35 #else
36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
37 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
39 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
41 #endif
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
46 int MatStoreValues_MPISBAIJ(Mat mat)
47 {
48   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
49   int          ierr;
50 
51   PetscFunctionBegin;
52   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
53   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
54   PetscFunctionReturn(0);
55 }
56 EXTERN_C_END
57 
58 EXTERN_C_BEGIN
59 #undef __FUNCT__
60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
61 int MatRetrieveValues_MPISBAIJ(Mat mat)
62 {
63   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
64   int          ierr;
65 
66   PetscFunctionBegin;
67   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
68   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 EXTERN_C_END
72 
73 /*
74      Local utility routine that creates a mapping from the global column
75    number to the local number in the off-diagonal part of the local
76    storage of the matrix.  This is done in a non scable way since the
77    length of colmap equals the global matrix length.
78 */
79 #undef __FUNCT__
80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
81 static int CreateColmap_MPISBAIJ_Private(Mat mat)
82 {
83   PetscFunctionBegin;
84   SETERRQ(1,"Function not yet written for SBAIJ format");
85   /* PetscFunctionReturn(0); */
86 }
87 
88 #define CHUNKSIZE  10
89 
90 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
91 { \
92  \
93     brow = row/bs;  \
94     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95     rmax = aimax[brow]; nrow = ailen[brow]; \
96       bcol = col/bs; \
97       ridx = row % bs; cidx = col % bs; \
98       low = 0; high = nrow; \
99       while (high-low > 3) { \
100         t = (low+high)/2; \
101         if (rp[t] > bcol) high = t; \
102         else              low  = t; \
103       } \
104       for (_i=low; _i<high; _i++) { \
105         if (rp[_i] > bcol) break; \
106         if (rp[_i] == bcol) { \
107           bap  = ap +  bs2*_i + bs*cidx + ridx; \
108           if (addv == ADD_VALUES) *bap += value;  \
109           else                    *bap  = value;  \
110           goto a_noinsert; \
111         } \
112       } \
113       if (a->nonew == 1) goto a_noinsert; \
114       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
115       if (nrow >= rmax) { \
116         /* there is no extra room in row, therefore enlarge */ \
117         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
118         MatScalar *new_a; \
119  \
120         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
121  \
122         /* malloc new storage space */ \
123         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
124         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
125         new_j = (int*)(new_a + bs2*new_nz); \
126         new_i = new_j + new_nz; \
127  \
128         /* copy over old data into new slots */ \
129         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
130         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
131         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
132         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
133         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
134         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
135         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
137                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
138         /* free up old matrix storage */ \
139         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
140         if (!a->singlemalloc) { \
141           ierr = PetscFree(a->i);CHKERRQ(ierr); \
142           ierr = PetscFree(a->j);CHKERRQ(ierr);\
143         } \
144         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
145         a->singlemalloc = PETSC_TRUE; \
146  \
147         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
148         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
149         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
150         a->s_maxnz += bs2*CHUNKSIZE; \
151         a->reallocs++; \
152         a->s_nz++; \
153       } \
154       N = nrow++ - 1;  \
155       /* shift up all the later entries in this row */ \
156       for (ii=N; ii>=_i; ii--) { \
157         rp[ii+1] = rp[ii]; \
158         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
159       } \
160       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
161       rp[_i]                      = bcol;  \
162       ap[bs2*_i + bs*cidx + ridx] = value;  \
163       a_noinsert:; \
164     ailen[brow] = nrow; \
165 }
166 #ifndef MatSetValues_SeqBAIJ_B_Private
167 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
168 { \
169     brow = row/bs;  \
170     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171     rmax = bimax[brow]; nrow = bilen[brow]; \
172       bcol = col/bs; \
173       ridx = row % bs; cidx = col % bs; \
174       low = 0; high = nrow; \
175       while (high-low > 3) { \
176         t = (low+high)/2; \
177         if (rp[t] > bcol) high = t; \
178         else              low  = t; \
179       } \
180       for (_i=low; _i<high; _i++) { \
181         if (rp[_i] > bcol) break; \
182         if (rp[_i] == bcol) { \
183           bap  = ap +  bs2*_i + bs*cidx + ridx; \
184           if (addv == ADD_VALUES) *bap += value;  \
185           else                    *bap  = value;  \
186           goto b_noinsert; \
187         } \
188       } \
189       if (b->nonew == 1) goto b_noinsert; \
190       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
191       if (nrow >= rmax) { \
192         /* there is no extra room in row, therefore enlarge */ \
193         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
194         MatScalar *new_a; \
195  \
196         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
197  \
198         /* malloc new storage space */ \
199         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
200         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
201         new_j = (int*)(new_a + bs2*new_nz); \
202         new_i = new_j + new_nz; \
203  \
204         /* copy over old data into new slots */ \
205         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
206         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
207         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
208         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
209         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
210         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
211         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
213                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
214         /* free up old matrix storage */ \
215         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
216         if (!b->singlemalloc) { \
217           ierr = PetscFree(b->i);CHKERRQ(ierr); \
218           ierr = PetscFree(b->j);CHKERRQ(ierr); \
219         } \
220         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
221         b->singlemalloc = PETSC_TRUE; \
222  \
223         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
224         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
225         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
226         b->maxnz += bs2*CHUNKSIZE; \
227         b->reallocs++; \
228         b->nz++; \
229       } \
230       N = nrow++ - 1;  \
231       /* shift up all the later entries in this row */ \
232       for (ii=N; ii>=_i; ii--) { \
233         rp[ii+1] = rp[ii]; \
234         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
235       } \
236       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
237       rp[_i]                      = bcol;  \
238       ap[bs2*_i + bs*cidx + ridx] = value;  \
239       b_noinsert:; \
240     bilen[brow] = nrow; \
241 }
242 #endif
243 
244 #if defined(PETSC_USE_MAT_SINGLE)
245 #undef __FUNCT__
246 #define __FUNCT__ "MatSetValues_MPISBAIJ"
247 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
248 {
249   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
250   int          ierr,i,N = m*n;
251   MatScalar    *vsingle;
252 
253   PetscFunctionBegin;
254   if (N > b->setvalueslen) {
255     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
256     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
257     b->setvalueslen  = N;
258   }
259   vsingle = b->setvaluescopy;
260 
261   for (i=0; i<N; i++) {
262     vsingle[i] = v[i];
263   }
264   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
271 {
272   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
273   int         ierr,i,N = m*n*b->bs2;
274   MatScalar   *vsingle;
275 
276   PetscFunctionBegin;
277   if (N > b->setvalueslen) {
278     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
279     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
280     b->setvalueslen  = N;
281   }
282   vsingle = b->setvaluescopy;
283   for (i=0; i<N; i++) {
284     vsingle[i] = v[i];
285   }
286   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
293 {
294   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
295   int         ierr,i,N = m*n;
296   MatScalar   *vsingle;
297 
298   PetscFunctionBegin;
299   SETERRQ(1,"Function not yet written for SBAIJ format");
300   /* PetscFunctionReturn(0); */
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
306 {
307   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
308   int         ierr,i,N = m*n*b->bs2;
309   MatScalar   *vsingle;
310 
311   PetscFunctionBegin;
312   SETERRQ(1,"Function not yet written for SBAIJ format");
313   /* PetscFunctionReturn(0); */
314 }
315 #endif
316 
317 /* Only add/insert a(i,j) with i<=j (blocks).
318    Any a(i,j) with i>j input by user is ingored.
319 */
320 #undef __FUNCT__
321 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
323 {
324   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
325   MatScalar    value;
326   PetscTruth   roworiented = baij->roworiented;
327   int          ierr,i,j,row,col;
328   int          rstart_orig=baij->rstart_bs;
329   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
330   int          cend_orig=baij->cend_bs,bs=baij->bs;
331 
332   /* Some Variables required in the macro */
333   Mat          A = baij->A;
334   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
335   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
336   MatScalar    *aa=a->a;
337 
338   Mat          B = baij->B;
339   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
340   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
341   MatScalar    *ba=b->a;
342 
343   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
344   int          low,high,t,ridx,cidx,bs2=a->bs2;
345   MatScalar    *ap,*bap;
346 
347   /* for stash */
348   int          n_loc, *in_loc=0;
349   MatScalar    *v_loc=0;
350 
351   PetscFunctionBegin;
352 
353   if(!baij->donotstash){
354     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
355     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
356   }
357 
358   for (i=0; i<m; i++) {
359     if (im[i] < 0) continue;
360 #if defined(PETSC_USE_BOPT_g)
361     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
362 #endif
363     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
364       row = im[i] - rstart_orig;              /* local row index */
365       for (j=0; j<n; j++) {
366         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
367         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
368           col = in[j] - cstart_orig;          /* local col index */
369           brow = row/bs; bcol = col/bs;
370           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
371           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
372           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
373           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
374         } else if (in[j] < 0) continue;
375 #if defined(PETSC_USE_BOPT_g)
376         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
377 #endif
378         else {  /* off-diag entry (B) */
379           if (mat->was_assembled) {
380             if (!baij->colmap) {
381               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
382             }
383 #if defined (PETSC_USE_CTABLE)
384             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
385             col  = col - 1;
386 #else
387             col = baij->colmap[in[j]/bs] - 1;
388 #endif
389             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
390               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
391               col =  in[j];
392               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
393               B = baij->B;
394               b = (Mat_SeqBAIJ*)(B)->data;
395               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
396               ba=b->a;
397             } else col += in[j]%bs;
398           } else col = in[j];
399           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
400           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
401           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
402         }
403       }
404     } else {  /* off processor entry */
405       if (!baij->donotstash) {
406         n_loc = 0;
407         for (j=0; j<n; j++){
408           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
409           in_loc[n_loc] = in[j];
410           if (roworiented) {
411             v_loc[n_loc] = v[i*n+j];
412           } else {
413             v_loc[n_loc] = v[j*m+i];
414           }
415           n_loc++;
416         }
417         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
418       }
419     }
420   }
421 
422   if(!baij->donotstash){
423     ierr = PetscFree(in_loc);CHKERRQ(ierr);
424     ierr = PetscFree(v_loc);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
432 {
433   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
434   MatScalar    *value,*barray=baij->barray;
435   PetscTruth   roworiented = baij->roworiented;
436   int          ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
437   int          rend=baij->rend,cstart=baij->cstart,stepval;
438   int          cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
439 
440   PetscFunctionBegin;
441   if(!barray) {
442     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
443     baij->barray = barray;
444   }
445 
446   if (roworiented) {
447     stepval = (n-1)*bs;
448   } else {
449     stepval = (m-1)*bs;
450   }
451   for (i=0; i<m; i++) {
452     if (im[i] < 0) continue;
453 #if defined(PETSC_USE_BOPT_g)
454     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
455 #endif
456     if (im[i] >= rstart && im[i] < rend) {
457       row = im[i] - rstart;
458       for (j=0; j<n; j++) {
459         /* If NumCol = 1 then a copy is not required */
460         if ((roworiented) && (n == 1)) {
461           barray = v + i*bs2;
462         } else if((!roworiented) && (m == 1)) {
463           barray = v + j*bs2;
464         } else { /* Here a copy is required */
465           if (roworiented) {
466             value = v + i*(stepval+bs)*bs + j*bs;
467           } else {
468             value = v + j*(stepval+bs)*bs + i*bs;
469           }
470           for (ii=0; ii<bs; ii++,value+=stepval) {
471             for (jj=0; jj<bs; jj++) {
472               *barray++  = *value++;
473             }
474           }
475           barray -=bs2;
476         }
477 
478         if (in[j] >= cstart && in[j] < cend){
479           col  = in[j] - cstart;
480           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482         else if (in[j] < 0) continue;
483 #if defined(PETSC_USE_BOPT_g)
484         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
485 #endif
486         else {
487           if (mat->was_assembled) {
488             if (!baij->colmap) {
489               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
490             }
491 
492 #if defined(PETSC_USE_BOPT_g)
493 #if defined (PETSC_USE_CTABLE)
494             { int data;
495               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
496               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
497             }
498 #else
499             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
500 #endif
501 #endif
502 #if defined (PETSC_USE_CTABLE)
503 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
504             col  = (col - 1)/bs;
505 #else
506             col = (baij->colmap[in[j]] - 1)/bs;
507 #endif
508             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
509               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
510               col =  in[j];
511             }
512           }
513           else col = in[j];
514           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
515         }
516       }
517     } else {
518       if (!baij->donotstash) {
519         if (roworiented) {
520           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
521         } else {
522           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
523         }
524       }
525     }
526   }
527   PetscFunctionReturn(0);
528 }
529 
530 #define HASH_KEY 0.6180339887
531 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
532 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
533 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
534 #undef __FUNCT__
535 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
536 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
537 {
538   PetscFunctionBegin;
539   SETERRQ(1,"Function not yet written for SBAIJ format");
540   /* PetscFunctionReturn(0); */
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
545 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
546 {
547   PetscFunctionBegin;
548   SETERRQ(1,"Function not yet written for SBAIJ format");
549   /* PetscFunctionReturn(0); */
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatGetValues_MPISBAIJ"
554 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
555 {
556   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
557   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
558   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
559 
560   PetscFunctionBegin;
561   for (i=0; i<m; i++) {
562     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
563     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
564     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
565       row = idxm[i] - bsrstart;
566       for (j=0; j<n; j++) {
567         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
568         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
569         if (idxn[j] >= bscstart && idxn[j] < bscend){
570           col = idxn[j] - bscstart;
571           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
572         } else {
573           if (!baij->colmap) {
574             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
575           }
576 #if defined (PETSC_USE_CTABLE)
577           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
578           data --;
579 #else
580           data = baij->colmap[idxn[j]/bs]-1;
581 #endif
582           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
583           else {
584             col  = data + idxn[j]%bs;
585             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
586           }
587         }
588       }
589     } else {
590       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
591     }
592   }
593  PetscFunctionReturn(0);
594 }
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatNorm_MPISBAIJ"
598 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
599 {
600   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
601   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
602   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
603   int        ierr;
604   PetscReal  sum[2],*lnorm2;
605 
606   PetscFunctionBegin;
607   if (baij->size == 1) {
608     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
609   } else {
610     if (type == NORM_FROBENIUS) {
611       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
612       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
613       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
614       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
615       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
616       /*
617       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
618       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
619       */
620       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
621       /*
622       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
623       PetscSynchronizedFlush(mat->comm); */
624 
625       *norm = sqrt(sum[0] + 2*sum[1]);
626       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
627     } else {
628       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
629     }
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 /*
635   Creates the hash table, and sets the table
636   This table is created only once.
637   If new entried need to be added to the matrix
638   then the hash table has to be destroyed and
639   recreated.
640 */
641 #undef __FUNCT__
642 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
643 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
644 {
645   PetscFunctionBegin;
646   SETERRQ(1,"Function not yet written for SBAIJ format");
647   /* PetscFunctionReturn(0); */
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
652 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
653 {
654   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
655   int         ierr,nstash,reallocs;
656   InsertMode  addv;
657 
658   PetscFunctionBegin;
659   if (baij->donotstash) {
660     PetscFunctionReturn(0);
661   }
662 
663   /* make sure all processors are either in INSERTMODE or ADDMODE */
664   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
665   if (addv == (ADD_VALUES|INSERT_VALUES)) {
666     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
667   }
668   mat->insertmode = addv; /* in case this processor had no cache */
669 
670   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
671   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
672   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
673   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
674   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
675   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
681 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
682 {
683   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
684   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
685   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
686   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
687   int         *row,*col,other_disassembled;
688   PetscTruth  r1,r2,r3;
689   MatScalar   *val;
690   InsertMode  addv = mat->insertmode;
691 #if defined(PETSC_HAVE_SPOOLES)
692   PetscTruth  flag;
693 #endif
694 
695   PetscFunctionBegin;
696 
697   if (!baij->donotstash) {
698     while (1) {
699       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
700       /*
701       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
702       PetscSynchronizedFlush(mat->comm);
703       */
704       if (!flg) break;
705 
706       for (i=0; i<n;) {
707         /* Now identify the consecutive vals belonging to the same row */
708         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
713         i = j;
714       }
715     }
716     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
717     /* Now process the block-stash. Since the values are stashed column-oriented,
718        set the roworiented flag to column oriented, and after MatSetValues()
719        restore the original flags */
720     r1 = baij->roworiented;
721     r2 = a->roworiented;
722     r3 = b->roworiented;
723     baij->roworiented = PETSC_FALSE;
724     a->roworiented    = PETSC_FALSE;
725     b->roworiented    = PETSC_FALSE;
726     while (1) {
727       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
728       if (!flg) break;
729 
730       for (i=0; i<n;) {
731         /* Now identify the consecutive vals belonging to the same row */
732         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
733         if (j < n) ncols = j-i;
734         else       ncols = n-i;
735         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
736         i = j;
737       }
738     }
739     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
740     baij->roworiented = r1;
741     a->roworiented    = r2;
742     b->roworiented    = r3;
743   }
744 
745   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
746   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
747 
748   /* determine if any processor has disassembled, if so we must
749      also disassemble ourselfs, in order that we may reassemble. */
750   /*
751      if nonzero structure of submatrix B cannot change then we know that
752      no processor disassembled thus we can skip this stuff
753   */
754   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
755     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
756     if (mat->was_assembled && !other_disassembled) {
757       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
762     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
763   }
764   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
765   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
766 
767 #if defined(PETSC_USE_BOPT_g)
768   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
769     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
770     baij->ht_total_ct  = 0;
771     baij->ht_insert_ct = 0;
772   }
773 #endif
774   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
775     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
776     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
777     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
778   }
779 
780   if (baij->rowvalues) {
781     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
782     baij->rowvalues = 0;
783   }
784 
785 #if defined(PETSC_HAVE_SPOOLES)
786   ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
787   if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); }
788 #endif
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
794 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
795 {
796   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
797   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
798   PetscTruth        isascii,isdraw;
799   PetscViewer       sviewer;
800   PetscViewerFormat format;
801 
802   PetscFunctionBegin;
803   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
804   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
805   if (isascii) {
806     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
807     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
808       MatInfo info;
809       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
810       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
812               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
813               baij->bs,(int)info.memory);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
816       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
818       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
819       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
820       PetscFunctionReturn(0);
821     } else if (format == PETSC_VIEWER_ASCII_INFO) {
822       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
823       PetscFunctionReturn(0);
824     }
825   }
826 
827   if (isdraw) {
828     PetscDraw       draw;
829     PetscTruth isnull;
830     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
831     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
832   }
833 
834   if (size == 1) {
835     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
836     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
837   } else {
838     /* assemble the entire matrix onto first processor. */
839     Mat         A;
840     Mat_SeqSBAIJ *Aloc;
841     Mat_SeqBAIJ *Bloc;
842     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
843     MatScalar   *a;
844 
845     if (!rank) {
846       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
847     } else {
848       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
849     }
850     PetscLogObjectParent(mat,A);
851 
852     /* copy over the A part */
853     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
854     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
855     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
856 
857     for (i=0; i<mbs; i++) {
858       rvals[0] = bs*(baij->rstart + i);
859       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
860       for (j=ai[i]; j<ai[i+1]; j++) {
861         col = (baij->cstart+aj[j])*bs;
862         for (k=0; k<bs; k++) {
863           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
864           col++; a += bs;
865         }
866       }
867     }
868     /* copy over the B part */
869     Bloc = (Mat_SeqBAIJ*)baij->B->data;
870     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
871     for (i=0; i<mbs; i++) {
872       rvals[0] = bs*(baij->rstart + i);
873       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
874       for (j=ai[i]; j<ai[i+1]; j++) {
875         col = baij->garray[aj[j]]*bs;
876         for (k=0; k<bs; k++) {
877           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
878           col++; a += bs;
879         }
880       }
881     }
882     ierr = PetscFree(rvals);CHKERRQ(ierr);
883     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
884     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
885     /*
886        Everyone has to call to draw the matrix since the graphics waits are
887        synchronized across all processors that share the PetscDraw object
888     */
889     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
890     if (!rank) {
891       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
892       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
893     }
894     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
895     ierr = MatDestroy(A);CHKERRQ(ierr);
896   }
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatView_MPISBAIJ"
902 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
903 {
904   int        ierr;
905   PetscTruth isascii,isdraw,issocket,isbinary;
906 
907   PetscFunctionBegin;
908   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
909   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
910   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
911   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
912   if (isascii || isdraw || issocket || isbinary) {
913     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
914   } else {
915     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
916   }
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "MatDestroy_MPISBAIJ"
922 int MatDestroy_MPISBAIJ(Mat mat)
923 {
924   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
925   int         ierr;
926 
927   PetscFunctionBegin;
928 #if defined(PETSC_USE_LOG)
929   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
930 #endif
931   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
932   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
933   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
934   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
935   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
936 #if defined (PETSC_USE_CTABLE)
937   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
938 #else
939   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
940 #endif
941   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
942   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
943   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
944   if (baij->slvec0) {
945     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
946     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
947   }
948   if (baij->slvec1) {
949     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
950     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
951     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
952   }
953   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
954   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
955   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
956   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
957 #if defined(PETSC_USE_MAT_SINGLE)
958   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
959 #endif
960   ierr = PetscFree(baij);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatMult_MPISBAIJ"
966 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
967 {
968   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
969   int         ierr,nt,mbs=a->mbs,bs=a->bs;
970   PetscScalar *x,*from,zero=0.0;
971 
972   PetscFunctionBegin;
973   /*
974   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
975   PetscSynchronizedFlush(A->comm);
976   */
977   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
978   if (nt != A->n) {
979     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
980   }
981   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
982   if (nt != A->m) {
983     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
984   }
985 
986   /* diagonal part */
987   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
988   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
989 
990   /* subdiagonal part */
991   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
992 
993   /* copy x into the vec slvec0 */
994   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
995   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
996   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
997   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
998 
999   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1000   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1002 
1003   /* supperdiagonal part */
1004   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1005 
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1011 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1012 {
1013   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1014   int         ierr,nt;
1015 
1016   PetscFunctionBegin;
1017   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1018   if (nt != A->n) {
1019     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1020   }
1021   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1022   if (nt != A->m) {
1023     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1024   }
1025 
1026   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1027   /* do diagonal part */
1028   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1029   /* do supperdiagonal part */
1030   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1031   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1032   /* do subdiagonal part */
1033   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1034   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1035   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1036 
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 #undef __FUNCT__
1041 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1042 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1043 {
1044   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1045   int          ierr,mbs=a->mbs,bs=a->bs;
1046   PetscScalar  *x,*from,zero=0.0;
1047 
1048   PetscFunctionBegin;
1049   /*
1050   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1051   PetscSynchronizedFlush(A->comm);
1052   */
1053   /* diagonal part */
1054   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1055   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1056 
1057   /* subdiagonal part */
1058   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1059 
1060   /* copy x into the vec slvec0 */
1061   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1062   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1063   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1064   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1065 
1066   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1067   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1068   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1069 
1070   /* supperdiagonal part */
1071   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1072 
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1078 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1079 {
1080   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1081   int        ierr;
1082 
1083   PetscFunctionBegin;
1084   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1085   /* do diagonal part */
1086   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1087   /* do supperdiagonal part */
1088   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1089   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1090 
1091   /* do subdiagonal part */
1092   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1093   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1094   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1095 
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1101 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   PetscFunctionBegin;
1104   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1105   /* PetscFunctionReturn(0); */
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1110 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   PetscFunctionBegin;
1113   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1114   /* PetscFunctionReturn(0); */
1115 }
1116 
1117 /*
1118   This only works correctly for square matrices where the subblock A->A is the
1119    diagonal block
1120 */
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1123 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1124 {
1125   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1126   int         ierr;
1127 
1128   PetscFunctionBegin;
1129   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1130   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatScale_MPISBAIJ"
1136 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A)
1137 {
1138   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1139   int         ierr;
1140 
1141   PetscFunctionBegin;
1142   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1143   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1149 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1150 {
1151   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1152   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1153   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1154   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1155   int            *cmap,*idx_p,cstart = mat->cstart;
1156 
1157   PetscFunctionBegin;
1158   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1159   mat->getrowactive = PETSC_TRUE;
1160 
1161   if (!mat->rowvalues && (idx || v)) {
1162     /*
1163         allocate enough space to hold information from the longest row.
1164     */
1165     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1166     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1167     int     max = 1,mbs = mat->mbs,tmp;
1168     for (i=0; i<mbs; i++) {
1169       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1170       if (max < tmp) { max = tmp; }
1171     }
1172     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1173     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1174   }
1175 
1176   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1177   lrow = row - brstart;  /* local row index */
1178 
1179   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1180   if (!v)   {pvA = 0; pvB = 0;}
1181   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1182   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1183   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1184   nztot = nzA + nzB;
1185 
1186   cmap  = mat->garray;
1187   if (v  || idx) {
1188     if (nztot) {
1189       /* Sort by increasing column numbers, assuming A and B already sorted */
1190       int imark = -1;
1191       if (v) {
1192         *v = v_p = mat->rowvalues;
1193         for (i=0; i<nzB; i++) {
1194           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1195           else break;
1196         }
1197         imark = i;
1198         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1199         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1200       }
1201       if (idx) {
1202         *idx = idx_p = mat->rowindices;
1203         if (imark > -1) {
1204           for (i=0; i<imark; i++) {
1205             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1206           }
1207         } else {
1208           for (i=0; i<nzB; i++) {
1209             if (cmap[cworkB[i]/bs] < cstart)
1210               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1211             else break;
1212           }
1213           imark = i;
1214         }
1215         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1216         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1217       }
1218     } else {
1219       if (idx) *idx = 0;
1220       if (v)   *v   = 0;
1221     }
1222   }
1223   *nz = nztot;
1224   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1225   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1231 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1232 {
1233   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1234 
1235   PetscFunctionBegin;
1236   if (baij->getrowactive == PETSC_FALSE) {
1237     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1238   }
1239   baij->getrowactive = PETSC_FALSE;
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1245 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1246 {
1247   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1248 
1249   PetscFunctionBegin;
1250   *bs = baij->bs;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1256 int MatZeroEntries_MPISBAIJ(Mat A)
1257 {
1258   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1259   int         ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1263   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1269 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1270 {
1271   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1272   Mat         A = a->A,B = a->B;
1273   int         ierr;
1274   PetscReal   isend[5],irecv[5];
1275 
1276   PetscFunctionBegin;
1277   info->block_size     = (PetscReal)a->bs;
1278   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1279   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1280   isend[3] = info->memory;  isend[4] = info->mallocs;
1281   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1282   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1283   isend[3] += info->memory;  isend[4] += info->mallocs;
1284   if (flag == MAT_LOCAL) {
1285     info->nz_used      = isend[0];
1286     info->nz_allocated = isend[1];
1287     info->nz_unneeded  = isend[2];
1288     info->memory       = isend[3];
1289     info->mallocs      = isend[4];
1290   } else if (flag == MAT_GLOBAL_MAX) {
1291     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1292     info->nz_used      = irecv[0];
1293     info->nz_allocated = irecv[1];
1294     info->nz_unneeded  = irecv[2];
1295     info->memory       = irecv[3];
1296     info->mallocs      = irecv[4];
1297   } else if (flag == MAT_GLOBAL_SUM) {
1298     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1299     info->nz_used      = irecv[0];
1300     info->nz_allocated = irecv[1];
1301     info->nz_unneeded  = irecv[2];
1302     info->memory       = irecv[3];
1303     info->mallocs      = irecv[4];
1304   } else {
1305     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1306   }
1307   info->rows_global       = (PetscReal)A->M;
1308   info->columns_global    = (PetscReal)A->N;
1309   info->rows_local        = (PetscReal)A->m;
1310   info->columns_local     = (PetscReal)A->N;
1311   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1312   info->fill_ratio_needed = 0;
1313   info->factor_mallocs    = 0;
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1319 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1320 {
1321   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1322   int         ierr;
1323 
1324   PetscFunctionBegin;
1325   switch (op) {
1326   case MAT_NO_NEW_NONZERO_LOCATIONS:
1327   case MAT_YES_NEW_NONZERO_LOCATIONS:
1328   case MAT_COLUMNS_UNSORTED:
1329   case MAT_COLUMNS_SORTED:
1330   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1331   case MAT_KEEP_ZEROED_ROWS:
1332   case MAT_NEW_NONZERO_LOCATION_ERR:
1333     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1334     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1335     break;
1336   case MAT_ROW_ORIENTED:
1337     a->roworiented = PETSC_TRUE;
1338     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1339     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1340     break;
1341   case MAT_ROWS_SORTED:
1342   case MAT_ROWS_UNSORTED:
1343   case MAT_YES_NEW_DIAGONALS:
1344     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1345     break;
1346   case MAT_COLUMN_ORIENTED:
1347     a->roworiented = PETSC_FALSE;
1348     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1349     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1350     break;
1351   case MAT_IGNORE_OFF_PROC_ENTRIES:
1352     a->donotstash = PETSC_TRUE;
1353     break;
1354   case MAT_NO_NEW_DIAGONALS:
1355     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1356   case MAT_USE_HASH_TABLE:
1357     a->ht_flag = PETSC_TRUE;
1358     break;
1359   default:
1360     SETERRQ(PETSC_ERR_SUP,"unknown option");
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1367 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1368 {
1369   int ierr;
1370   PetscFunctionBegin;
1371   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1377 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1378 {
1379   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1380   Mat         a = baij->A,b = baij->B;
1381   int         ierr,s1,s2,s3;
1382 
1383   PetscFunctionBegin;
1384   if (ll != rr) {
1385     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1386   }
1387   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1388   if (rr) {
1389     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1390     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1391     /* Overlap communication with computation. */
1392     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1393     /*} if (ll) { */
1394     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1395     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1396     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1397     /* } */
1398   /* scale  the diagonal block */
1399   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1400 
1401   /* if (rr) { */
1402     /* Do a scatter end and then right scale the off-diagonal block */
1403     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1404     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1405   }
1406 
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1412 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag)
1413 {
1414   PetscFunctionBegin;
1415   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1420 int MatPrintHelp_MPISBAIJ(Mat A)
1421 {
1422   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1423   MPI_Comm    comm = A->comm;
1424   static int  called = 0;
1425   int         ierr;
1426 
1427   PetscFunctionBegin;
1428   if (!a->rank) {
1429     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1430   }
1431   if (called) {PetscFunctionReturn(0);} else called = 1;
1432   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1433   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1439 int MatSetUnfactored_MPISBAIJ(Mat A)
1440 {
1441   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1442   int         ierr;
1443 
1444   PetscFunctionBegin;
1445   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatEqual_MPISBAIJ"
1453 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1454 {
1455   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1456   Mat         a,b,c,d;
1457   PetscTruth  flg;
1458   int         ierr;
1459 
1460   PetscFunctionBegin;
1461   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr);
1462   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1463   a = matA->A; b = matA->B;
1464   c = matB->A; d = matB->B;
1465 
1466   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1467   if (flg == PETSC_TRUE) {
1468     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1469   }
1470   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1476 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1477 {
1478   int        ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 /* -------------------------------------------------------------------*/
1485 static struct _MatOps MatOps_Values = {
1486   MatSetValues_MPISBAIJ,
1487   MatGetRow_MPISBAIJ,
1488   MatRestoreRow_MPISBAIJ,
1489   MatMult_MPISBAIJ,
1490   MatMultAdd_MPISBAIJ,
1491   MatMultTranspose_MPISBAIJ,
1492   MatMultTransposeAdd_MPISBAIJ,
1493   0,
1494   0,
1495   0,
1496   0,
1497   0,
1498   0,
1499   MatRelax_MPISBAIJ,
1500   MatTranspose_MPISBAIJ,
1501   MatGetInfo_MPISBAIJ,
1502   MatEqual_MPISBAIJ,
1503   MatGetDiagonal_MPISBAIJ,
1504   MatDiagonalScale_MPISBAIJ,
1505   MatNorm_MPISBAIJ,
1506   MatAssemblyBegin_MPISBAIJ,
1507   MatAssemblyEnd_MPISBAIJ,
1508   0,
1509   MatSetOption_MPISBAIJ,
1510   MatZeroEntries_MPISBAIJ,
1511   MatZeroRows_MPISBAIJ,
1512   0,
1513   0,
1514   0,
1515   0,
1516   MatSetUpPreallocation_MPISBAIJ,
1517   0,
1518   0,
1519   0,
1520   0,
1521   MatDuplicate_MPISBAIJ,
1522   0,
1523   0,
1524   0,
1525   0,
1526   0,
1527   0,
1528   0,
1529   MatGetValues_MPISBAIJ,
1530   0,
1531   MatPrintHelp_MPISBAIJ,
1532   MatScale_MPISBAIJ,
1533   0,
1534   0,
1535   0,
1536   MatGetBlockSize_MPISBAIJ,
1537   0,
1538   0,
1539   0,
1540   0,
1541   0,
1542   0,
1543   MatSetUnfactored_MPISBAIJ,
1544   0,
1545   MatSetValuesBlocked_MPISBAIJ,
1546   0,
1547   0,
1548   0,
1549   MatGetPetscMaps_Petsc,
1550   0,
1551   0,
1552   0,
1553   0,
1554   0,
1555   0,
1556   MatGetRowMax_MPISBAIJ};
1557 
1558 
1559 EXTERN_C_BEGIN
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1562 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1563 {
1564   PetscFunctionBegin;
1565   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1566   *iscopy = PETSC_FALSE;
1567   PetscFunctionReturn(0);
1568 }
1569 EXTERN_C_END
1570 
1571 EXTERN_C_BEGIN
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatCreate_MPISBAIJ"
1574 int MatCreate_MPISBAIJ(Mat B)
1575 {
1576   Mat_MPISBAIJ *b;
1577   int          ierr;
1578   PetscTruth   flg;
1579 
1580   PetscFunctionBegin;
1581 
1582   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1583   B->data = (void*)b;
1584   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1585   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1586 
1587   B->ops->destroy    = MatDestroy_MPISBAIJ;
1588   B->ops->view       = MatView_MPISBAIJ;
1589   B->mapping    = 0;
1590   B->factor     = 0;
1591   B->assembled  = PETSC_FALSE;
1592 
1593   B->insertmode = NOT_SET_VALUES;
1594   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1595   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1596 
1597   /* build local table of row and column ownerships */
1598   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1599   b->cowners    = b->rowners + b->size + 2;
1600   b->rowners_bs = b->cowners + b->size + 2;
1601   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1602 
1603   /* build cache for off array entries formed */
1604   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1605   b->donotstash  = PETSC_FALSE;
1606   b->colmap      = PETSC_NULL;
1607   b->garray      = PETSC_NULL;
1608   b->roworiented = PETSC_TRUE;
1609 
1610 #if defined(PETSC_USE_MAT_SINGLE)
1611   /* stuff for MatSetValues_XXX in single precision */
1612   b->setvalueslen     = 0;
1613   b->setvaluescopy    = PETSC_NULL;
1614 #endif
1615 
1616   /* stuff used in block assembly */
1617   b->barray       = 0;
1618 
1619   /* stuff used for matrix vector multiply */
1620   b->lvec         = 0;
1621   b->Mvctx        = 0;
1622   b->slvec0       = 0;
1623   b->slvec0b      = 0;
1624   b->slvec1       = 0;
1625   b->slvec1a      = 0;
1626   b->slvec1b      = 0;
1627   b->sMvctx       = 0;
1628 
1629   /* stuff for MatGetRow() */
1630   b->rowindices   = 0;
1631   b->rowvalues    = 0;
1632   b->getrowactive = PETSC_FALSE;
1633 
1634   /* hash table stuff */
1635   b->ht           = 0;
1636   b->hd           = 0;
1637   b->ht_size      = 0;
1638   b->ht_flag      = PETSC_FALSE;
1639   b->ht_fact      = 0;
1640   b->ht_total_ct  = 0;
1641   b->ht_insert_ct = 0;
1642 
1643   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1644   if (flg) {
1645     PetscReal fact = 1.39;
1646     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1647     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1648     if (fact <= 1.0) fact = 1.39;
1649     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1650     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1651   }
1652   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1653                                      "MatStoreValues_MPISBAIJ",
1654                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1655   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1656                                      "MatRetrieveValues_MPISBAIJ",
1657                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1659                                      "MatGetDiagonalBlock_MPISBAIJ",
1660                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1661   PetscFunctionReturn(0);
1662 }
1663 EXTERN_C_END
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1667 /*@C
1668    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1669    the user should preallocate the matrix storage by setting the parameters
1670    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1671    performance can be increased by more than a factor of 50.
1672 
1673    Collective on Mat
1674 
1675    Input Parameters:
1676 +  A - the matrix
1677 .  bs   - size of blockk
1678 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1679            submatrix  (same for all local rows)
1680 .  d_nnz - array containing the number of block nonzeros in the various block rows
1681            in the upper triangular and diagonal part of the in diagonal portion of the local
1682            (possibly different for each block row) or PETSC_NULL.  You must leave room
1683            for the diagonal entry even if it is zero.
1684 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1685            submatrix (same for all local rows).
1686 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1687            off-diagonal portion of the local submatrix (possibly different for
1688            each block row) or PETSC_NULL.
1689 
1690 
1691    Options Database Keys:
1692 .   -mat_no_unroll - uses code that does not unroll the loops in the
1693                      block calculations (much slower)
1694 .   -mat_block_size - size of the blocks to use
1695 
1696    Notes:
1697 
1698    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1699    than it must be used on all processors that share the object for that argument.
1700 
1701    Storage Information:
1702    For a square global matrix we define each processor's diagonal portion
1703    to be its local rows and the corresponding columns (a square submatrix);
1704    each processor's off-diagonal portion encompasses the remainder of the
1705    local matrix (a rectangular submatrix).
1706 
1707    The user can specify preallocated storage for the diagonal part of
1708    the local submatrix with either d_nz or d_nnz (not both).  Set
1709    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1710    memory allocation.  Likewise, specify preallocated storage for the
1711    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1712 
1713    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1714    the figure below we depict these three local rows and all columns (0-11).
1715 
1716 .vb
1717            0 1 2 3 4 5 6 7 8 9 10 11
1718           -------------------
1719    row 3  |  o o o d d d o o o o o o
1720    row 4  |  o o o d d d o o o o o o
1721    row 5  |  o o o d d d o o o o o o
1722           -------------------
1723 .ve
1724 
1725    Thus, any entries in the d locations are stored in the d (diagonal)
1726    submatrix, and any entries in the o locations are stored in the
1727    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1728    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1729 
1730    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1731    plus the diagonal part of the d matrix,
1732    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1733    In general, for PDE problems in which most nonzeros are near the diagonal,
1734    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1735    or you will get TERRIBLE performance; see the users' manual chapter on
1736    matrices.
1737 
1738    Level: intermediate
1739 
1740 .keywords: matrix, block, aij, compressed row, sparse, parallel
1741 
1742 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1743 @*/
1744 
1745 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1746 {
1747   Mat_MPISBAIJ *b;
1748   int          ierr,i,mbs,Mbs;
1749   PetscTruth   flg2;
1750 
1751   PetscFunctionBegin;
1752   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr);
1753   if (!flg2) PetscFunctionReturn(0);
1754 
1755   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1756 
1757   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1758   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1759   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1760   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1761   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1762   if (d_nnz) {
1763     for (i=0; i<B->m/bs; i++) {
1764       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1765     }
1766   }
1767   if (o_nnz) {
1768     for (i=0; i<B->m/bs; i++) {
1769       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1770     }
1771   }
1772   B->preallocated = PETSC_TRUE;
1773   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1774   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1775   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1776   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1777 
1778   b   = (Mat_MPISBAIJ*)B->data;
1779   mbs = B->m/bs;
1780   Mbs = B->M/bs;
1781   if (mbs*bs != B->m) {
1782     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1783   }
1784 
1785   b->bs  = bs;
1786   b->bs2 = bs*bs;
1787   b->mbs = mbs;
1788   b->nbs = mbs;
1789   b->Mbs = Mbs;
1790   b->Nbs = Mbs;
1791 
1792   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1793   b->rowners[0]    = 0;
1794   for (i=2; i<=b->size; i++) {
1795     b->rowners[i] += b->rowners[i-1];
1796   }
1797   b->rstart    = b->rowners[b->rank];
1798   b->rend      = b->rowners[b->rank+1];
1799   b->cstart    = b->rstart;
1800   b->cend      = b->rend;
1801   for (i=0; i<=b->size; i++) {
1802     b->rowners_bs[i] = b->rowners[i]*bs;
1803   }
1804   b->rstart_bs = b-> rstart*bs;
1805   b->rend_bs   = b->rend*bs;
1806 
1807   b->cstart_bs = b->cstart*bs;
1808   b->cend_bs   = b->cend*bs;
1809 
1810 
1811   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1812   PetscLogObjectParent(B,b->A);
1813   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1814   PetscLogObjectParent(B,b->B);
1815 
1816   /* build cache for off array entries formed */
1817   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1818 
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatCreateMPISBAIJ"
1824 /*@C
1825    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1826    (block compressed row).  For good matrix assembly performance
1827    the user should preallocate the matrix storage by setting the parameters
1828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1829    performance can be increased by more than a factor of 50.
1830 
1831    Collective on MPI_Comm
1832 
1833    Input Parameters:
1834 +  comm - MPI communicator
1835 .  bs   - size of blockk
1836 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1837            This value should be the same as the local size used in creating the
1838            y vector for the matrix-vector product y = Ax.
1839 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1840            This value should be the same as the local size used in creating the
1841            x vector for the matrix-vector product y = Ax.
1842 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1843 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1844 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1845            submatrix  (same for all local rows)
1846 .  d_nnz - array containing the number of block nonzeros in the various block rows
1847            in the upper triangular portion of the in diagonal portion of the local
1848            (possibly different for each block block row) or PETSC_NULL.
1849            You must leave room for the diagonal entry even if it is zero.
1850 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1851            submatrix (same for all local rows).
1852 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1853            off-diagonal portion of the local submatrix (possibly different for
1854            each block row) or PETSC_NULL.
1855 
1856    Output Parameter:
1857 .  A - the matrix
1858 
1859    Options Database Keys:
1860 .   -mat_no_unroll - uses code that does not unroll the loops in the
1861                      block calculations (much slower)
1862 .   -mat_block_size - size of the blocks to use
1863 .   -mat_mpi - use the parallel matrix data structures even on one processor
1864                (defaults to using SeqBAIJ format on one processor)
1865 
1866    Notes:
1867    The user MUST specify either the local or global matrix dimensions
1868    (possibly both).
1869 
1870    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1871    than it must be used on all processors that share the object for that argument.
1872 
1873    Storage Information:
1874    For a square global matrix we define each processor's diagonal portion
1875    to be its local rows and the corresponding columns (a square submatrix);
1876    each processor's off-diagonal portion encompasses the remainder of the
1877    local matrix (a rectangular submatrix).
1878 
1879    The user can specify preallocated storage for the diagonal part of
1880    the local submatrix with either d_nz or d_nnz (not both).  Set
1881    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1882    memory allocation.  Likewise, specify preallocated storage for the
1883    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1884 
1885    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1886    the figure below we depict these three local rows and all columns (0-11).
1887 
1888 .vb
1889            0 1 2 3 4 5 6 7 8 9 10 11
1890           -------------------
1891    row 3  |  o o o d d d o o o o o o
1892    row 4  |  o o o d d d o o o o o o
1893    row 5  |  o o o d d d o o o o o o
1894           -------------------
1895 .ve
1896 
1897    Thus, any entries in the d locations are stored in the d (diagonal)
1898    submatrix, and any entries in the o locations are stored in the
1899    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1900    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1901 
1902    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1903    plus the diagonal part of the d matrix,
1904    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1905    In general, for PDE problems in which most nonzeros are near the diagonal,
1906    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1907    or you will get TERRIBLE performance; see the users' manual chapter on
1908    matrices.
1909 
1910    Level: intermediate
1911 
1912 .keywords: matrix, block, aij, compressed row, sparse, parallel
1913 
1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1915 @*/
1916 
1917 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1918 {
1919   int ierr,size;
1920 
1921   PetscFunctionBegin;
1922   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1923   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1924   if (size > 1) {
1925     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1926     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1927   } else {
1928     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1929     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1930   }
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1937 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1938 {
1939   Mat          mat;
1940   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1941   int          ierr,len=0;
1942 
1943   PetscFunctionBegin;
1944   *newmat       = 0;
1945   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1946   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
1947   mat->preallocated = PETSC_TRUE;
1948   a = (Mat_MPISBAIJ*)mat->data;
1949   a->bs  = oldmat->bs;
1950   a->bs2 = oldmat->bs2;
1951   a->mbs = oldmat->mbs;
1952   a->nbs = oldmat->nbs;
1953   a->Mbs = oldmat->Mbs;
1954   a->Nbs = oldmat->Nbs;
1955 
1956   a->rstart       = oldmat->rstart;
1957   a->rend         = oldmat->rend;
1958   a->cstart       = oldmat->cstart;
1959   a->cend         = oldmat->cend;
1960   a->size         = oldmat->size;
1961   a->rank         = oldmat->rank;
1962   a->donotstash   = oldmat->donotstash;
1963   a->roworiented  = oldmat->roworiented;
1964   a->rowindices   = 0;
1965   a->rowvalues    = 0;
1966   a->getrowactive = PETSC_FALSE;
1967   a->barray       = 0;
1968   a->rstart_bs    = oldmat->rstart_bs;
1969   a->rend_bs      = oldmat->rend_bs;
1970   a->cstart_bs    = oldmat->cstart_bs;
1971   a->cend_bs      = oldmat->cend_bs;
1972 
1973   /* hash table stuff */
1974   a->ht           = 0;
1975   a->hd           = 0;
1976   a->ht_size      = 0;
1977   a->ht_flag      = oldmat->ht_flag;
1978   a->ht_fact      = oldmat->ht_fact;
1979   a->ht_total_ct  = 0;
1980   a->ht_insert_ct = 0;
1981 
1982   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1983   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1984   a->cowners    = a->rowners + a->size + 2;
1985   a->rowners_bs = a->cowners + a->size + 2;
1986   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1987   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1988   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
1989   if (oldmat->colmap) {
1990 #if defined (PETSC_USE_CTABLE)
1991     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1992 #else
1993     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1994     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1995     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
1996 #endif
1997   } else a->colmap = 0;
1998   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1999     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2000     PetscLogObjectMemory(mat,len*sizeof(int));
2001     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2002   } else a->garray = 0;
2003 
2004   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2005   PetscLogObjectParent(mat,a->lvec);
2006   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2007 
2008   PetscLogObjectParent(mat,a->Mvctx);
2009   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2010   PetscLogObjectParent(mat,a->A);
2011   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2012   PetscLogObjectParent(mat,a->B);
2013   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2014   *newmat = mat;
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #include "petscsys.h"
2019 
2020 EXTERN_C_BEGIN
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatLoad_MPISBAIJ"
2023 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2024 {
2025   Mat          A;
2026   int          i,nz,ierr,j,rstart,rend,fd;
2027   PetscScalar  *vals,*buf;
2028   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2029   MPI_Status   status;
2030   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2031   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2032   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2033   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2034   int          dcount,kmax,k,nzcount,tmp;
2035 #if defined(PETSC_HAVE_SPOOLES)
2036   PetscTruth   flag;
2037 #endif
2038 
2039   PetscFunctionBegin;
2040   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2041 
2042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2043   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2044   if (!rank) {
2045     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2046     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2047     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2048     if (header[3] < 0) {
2049       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2050     }
2051   }
2052 
2053   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2054   M = header[1]; N = header[2];
2055 
2056   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2057 
2058   /*
2059      This code adds extra rows to make sure the number of rows is
2060      divisible by the blocksize
2061   */
2062   Mbs        = M/bs;
2063   extra_rows = bs - M + bs*(Mbs);
2064   if (extra_rows == bs) extra_rows = 0;
2065   else                  Mbs++;
2066   if (extra_rows &&!rank) {
2067     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2068   }
2069 
2070   /* determine ownership of all rows */
2071   mbs        = Mbs/size + ((Mbs % size) > rank);
2072   m          = mbs*bs;
2073   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2074   browners   = rowners + size + 1;
2075   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2076   rowners[0] = 0;
2077   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2078   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2079   rstart = rowners[rank];
2080   rend   = rowners[rank+1];
2081 
2082   /* distribute row lengths to all processors */
2083   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2084   if (!rank) {
2085     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2086     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2087     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2088     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2089     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2090     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2091     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2092   } else {
2093     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2094   }
2095 
2096   if (!rank) {   /* procs[0] */
2097     /* calculate the number of nonzeros on each processor */
2098     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2099     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2100     for (i=0; i<size; i++) {
2101       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2102         procsnz[i] += rowlengths[j];
2103       }
2104     }
2105     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2106 
2107     /* determine max buffer needed and allocate it */
2108     maxnz = 0;
2109     for (i=0; i<size; i++) {
2110       maxnz = PetscMax(maxnz,procsnz[i]);
2111     }
2112     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2113 
2114     /* read in my part of the matrix column indices  */
2115     nz     = procsnz[0];
2116     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2117     mycols = ibuf;
2118     if (size == 1)  nz -= extra_rows;
2119     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2120     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2121 
2122     /* read in every ones (except the last) and ship off */
2123     for (i=1; i<size-1; i++) {
2124       nz   = procsnz[i];
2125       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2126       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2127     }
2128     /* read in the stuff for the last proc */
2129     if (size != 1) {
2130       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2131       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2132       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2133       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2134     }
2135     ierr = PetscFree(cols);CHKERRQ(ierr);
2136   } else {  /* procs[i], i>0 */
2137     /* determine buffer space needed for message */
2138     nz = 0;
2139     for (i=0; i<m; i++) {
2140       nz += locrowlens[i];
2141     }
2142     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2143     mycols = ibuf;
2144     /* receive message of column indices*/
2145     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2146     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2147     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2148   }
2149 
2150   /* loop over local rows, determining number of off diagonal entries */
2151   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2152   odlens   = dlens + (rend-rstart);
2153   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2154   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2155   masked1  = mask    + Mbs;
2156   masked2  = masked1 + Mbs;
2157   rowcount = 0; nzcount = 0;
2158   for (i=0; i<mbs; i++) {
2159     dcount  = 0;
2160     odcount = 0;
2161     for (j=0; j<bs; j++) {
2162       kmax = locrowlens[rowcount];
2163       for (k=0; k<kmax; k++) {
2164         tmp = mycols[nzcount++]/bs; /* block col. index */
2165         if (!mask[tmp]) {
2166           mask[tmp] = 1;
2167           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2168           else masked1[dcount++] = tmp; /* entry in diag portion */
2169         }
2170       }
2171       rowcount++;
2172     }
2173 
2174     dlens[i]  = dcount;  /* d_nzz[i] */
2175     odlens[i] = odcount; /* o_nzz[i] */
2176 
2177     /* zero out the mask elements we set */
2178     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2179     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2180   }
2181 
2182   /* create our matrix */
2183   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2184   CHKERRQ(ierr);
2185   A = *newmat;
2186   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2187 
2188   if (!rank) {
2189     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2190     /* read in my part of the matrix numerical values  */
2191     nz = procsnz[0];
2192     vals = buf;
2193     mycols = ibuf;
2194     if (size == 1)  nz -= extra_rows;
2195     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2196     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2197 
2198     /* insert into matrix */
2199     jj      = rstart*bs;
2200     for (i=0; i<m; i++) {
2201       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2202       mycols += locrowlens[i];
2203       vals   += locrowlens[i];
2204       jj++;
2205     }
2206 
2207     /* read in other processors (except the last one) and ship out */
2208     for (i=1; i<size-1; i++) {
2209       nz   = procsnz[i];
2210       vals = buf;
2211       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2212       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2213     }
2214     /* the last proc */
2215     if (size != 1){
2216       nz   = procsnz[i] - extra_rows;
2217       vals = buf;
2218       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2219       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2220       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2221     }
2222     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2223 
2224   } else {
2225     /* receive numeric values */
2226     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2227 
2228     /* receive message of values*/
2229     vals   = buf;
2230     mycols = ibuf;
2231     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2232     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2233     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2234 
2235     /* insert into matrix */
2236     jj      = rstart*bs;
2237     for (i=0; i<m; i++) {
2238       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2239       mycols += locrowlens[i];
2240       vals   += locrowlens[i];
2241       jj++;
2242     }
2243   }
2244 
2245   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2246   ierr = PetscFree(buf);CHKERRQ(ierr);
2247   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2248   ierr = PetscFree(rowners);CHKERRQ(ierr);
2249   ierr = PetscFree(dlens);CHKERRQ(ierr);
2250   ierr = PetscFree(mask);CHKERRQ(ierr);
2251   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2252   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2253 #if defined(PETSC_HAVE_SPOOLES)
2254   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
2255   if (flag) {
2256     if (size == 1) {
2257       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2258     } else {
2259       ierr = MatUseSpooles_MPISBAIJ(A);CHKERRQ(ierr);
2260     }
2261   }
2262 #endif
2263   PetscFunctionReturn(0);
2264 }
2265 EXTERN_C_END
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2269 /*@
2270    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2271 
2272    Input Parameters:
2273 .  mat  - the matrix
2274 .  fact - factor
2275 
2276    Collective on Mat
2277 
2278    Level: advanced
2279 
2280   Notes:
2281    This can also be set by the command line option: -mat_use_hash_table fact
2282 
2283 .keywords: matrix, hashtable, factor, HT
2284 
2285 .seealso: MatSetOption()
2286 @*/
2287 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2288 {
2289   PetscFunctionBegin;
2290   SETERRQ(1,"Function not yet written for SBAIJ format");
2291   /* PetscFunctionReturn(0); */
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2296 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2297 {
2298   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2299   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2300   PetscReal    atmp;
2301   PetscReal    *work,*svalues,*rvalues;
2302   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2303   int          rank,size,*rowners_bs,dest,count,source;
2304   PetscScalar  *va;
2305   MatScalar    *ba;
2306   MPI_Status   stat;
2307 
2308   PetscFunctionBegin;
2309   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2310   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2311 
2312   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2313   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2314 
2315   bs   = a->bs;
2316   mbs  = a->mbs;
2317   Mbs  = a->Mbs;
2318   ba   = b->a;
2319   bi   = b->i;
2320   bj   = b->j;
2321   /*
2322   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2323   PetscSynchronizedFlush(A->comm);
2324   */
2325 
2326   /* find ownerships */
2327   rowners_bs = a->rowners_bs;
2328   /*
2329   if (!rank){
2330     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2331   }
2332   */
2333 
2334   /* each proc creates an array to be distributed */
2335   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2336   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2337 
2338   /* row_max for B */
2339   if (rank != size-1){
2340     for (i=0; i<mbs; i++) {
2341       ncols = bi[1] - bi[0]; bi++;
2342       brow  = bs*i;
2343       for (j=0; j<ncols; j++){
2344         bcol = bs*(*bj);
2345         for (kcol=0; kcol<bs; kcol++){
2346           col = bcol + kcol;                 /* local col index */
2347           col += rowners_bs[rank+1];      /* global col index */
2348           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2349           for (krow=0; krow<bs; krow++){
2350             atmp = PetscAbsScalar(*ba); ba++;
2351             row = brow + krow;    /* local row index */
2352             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2353             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2354             if (work[col] < atmp) work[col] = atmp;
2355           }
2356         }
2357         bj++;
2358       }
2359     }
2360     /*
2361       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2362       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2363       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2364       */
2365 
2366     /* send values to its owners */
2367     for (dest=rank+1; dest<size; dest++){
2368       svalues = work + rowners_bs[dest];
2369       count   = rowners_bs[dest+1]-rowners_bs[dest];
2370       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2371       /*
2372       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2373       PetscSynchronizedFlush(A->comm);
2374       */
2375     }
2376   }
2377 
2378   /* receive values */
2379   if (rank){
2380     rvalues = work;
2381     count   = rowners_bs[rank+1]-rowners_bs[rank];
2382     for (source=0; source<rank; source++){
2383       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2384       /* process values */
2385       for (i=0; i<count; i++){
2386         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2387       }
2388       /*
2389       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2390       PetscSynchronizedFlush(A->comm);
2391       */
2392     }
2393   }
2394 
2395   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2396   ierr = PetscFree(work);CHKERRQ(ierr);
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatRelax_MPISBAIJ"
2402 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2403 {
2404   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2405   int            ierr,mbs=mat->mbs,bs=mat->bs;
2406   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2407   Vec            bb1;
2408 
2409   PetscFunctionBegin;
2410   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2411   if (bs > 1)
2412     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2413 
2414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2415     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2416       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2417       its--;
2418     }
2419 
2420     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2421     while (its--){
2422 
2423       /* lower triangular part: slvec0b = - B^T*xx */
2424       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2425 
2426       /* copy xx into slvec0a */
2427       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2428       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2429       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2430       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2431 
2432       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2433 
2434       /* copy bb into slvec1a */
2435       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2436       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2437       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2438       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2439 
2440       /* set slvec1b = 0 */
2441       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2442 
2443       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2444       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2445       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2446       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2447 
2448       /* upper triangular part: bb1 = bb1 - B*x */
2449       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2450 
2451       /* local diagonal sweep */
2452       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2453     }
2454     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2455   } else {
2456     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2457   }
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2463 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2464 {
2465   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2466   int            ierr;
2467   PetscScalar    mone=-1.0;
2468   Vec            lvec1,bb1;
2469 
2470   PetscFunctionBegin;
2471   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2472   if (mat->bs > 1)
2473     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2474 
2475   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2476     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2477       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2478       its--;
2479     }
2480 
2481     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2482     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2483     while (its--){
2484       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2485 
2486       /* lower diagonal part: bb1 = bb - B^T*xx */
2487       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2488       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2489 
2490       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2491       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2492       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2493 
2494       /* upper diagonal part: bb1 = bb1 - B*x */
2495       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2496       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2497 
2498       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2499 
2500       /* diagonal sweep */
2501       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2502     }
2503     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2504     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2505   } else {
2506     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2507   }
2508   PetscFunctionReturn(0);
2509 }
2510 
2511