xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 658e7b3e350a11d08d9dbc3e3025e70d0c4e4879)
1 /*$Id: mpisbaij.c,v 1.30 2000/10/17 16:59:32 hzhang Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19 extern int MatZeroRows_SeqBAIJ(Mat,IS,Scalar *);
20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
21 
22 /*  UGLY, ugly, ugly
23    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
24    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
25    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
26    converts the entries into single precision and then calls ..._MatScalar() to put them
27    into the single precision data structures.
28 */
29 #if defined(PETSC_USE_MAT_SINGLE)
30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35 #else
36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
37 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
39 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
41 #endif
42 
43 EXTERN_C_BEGIN
44 #undef __FUNC__
45 #define __FUNC__ /*<a name="MatStoreValues_MPISBAIJ"></a>*/"MatStoreValues_MPISBAIJ"
46 int MatStoreValues_MPISBAIJ(Mat mat)
47 {
48   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
49   int         ierr;
50 
51   PetscFunctionBegin;
52   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
53   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
54   PetscFunctionReturn(0);
55 }
56 EXTERN_C_END
57 
58 EXTERN_C_BEGIN
59 #undef __FUNC__
60 #define __FUNC__ /*<a name="MatRetrieveValues_MPISBAIJ"></a>*/"MatRetrieveValues_MPISBAIJ"
61 int MatRetrieveValues_MPISBAIJ(Mat mat)
62 {
63   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
64   int         ierr;
65 
66   PetscFunctionBegin;
67   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
68   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 EXTERN_C_END
72 
73 /*
74      Local utility routine that creates a mapping from the global column
75    number to the local number in the off-diagonal part of the local
76    storage of the matrix.  This is done in a non scable way since the
77    length of colmap equals the global matrix length.
78 */
79 #undef __FUNC__
80 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private"
81 static int CreateColmap_MPISBAIJ_Private(Mat mat)
82 {
83   PetscFunctionBegin;
84   SETERRQ(1,"Function not yet written for SBAIJ format");
85   /* PetscFunctionReturn(0); */
86 }
87 
88 #define CHUNKSIZE  10
89 
90 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
91 { \
92  \
93     brow = row/bs;  \
94     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95     rmax = aimax[brow]; nrow = ailen[brow]; \
96       bcol = col/bs; \
97       ridx = row % bs; cidx = col % bs; \
98       low = 0; high = nrow; \
99       while (high-low > 3) { \
100         t = (low+high)/2; \
101         if (rp[t] > bcol) high = t; \
102         else              low  = t; \
103       } \
104       for (_i=low; _i<high; _i++) { \
105         if (rp[_i] > bcol) break; \
106         if (rp[_i] == bcol) { \
107           bap  = ap +  bs2*_i + bs*cidx + ridx; \
108           if (addv == ADD_VALUES) *bap += value;  \
109           else                    *bap  = value;  \
110           goto a_noinsert; \
111         } \
112       } \
113       if (a->nonew == 1) goto a_noinsert; \
114       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
115       if (nrow >= rmax) { \
116         /* there is no extra room in row, therefore enlarge */ \
117         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
118         MatScalar *new_a; \
119  \
120         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
121  \
122         /* malloc new storage space */ \
123         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
124         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
125         new_j   = (int*)(new_a + bs2*new_nz); \
126         new_i   = new_j + new_nz; \
127  \
128         /* copy over old data into new slots */ \
129         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
130         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
131         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
132         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
133         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
134         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
135         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
137                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
138         /* free up old matrix storage */ \
139         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
140         if (!a->singlemalloc) { \
141           ierr = PetscFree(a->i);CHKERRQ(ierr); \
142           ierr = PetscFree(a->j);CHKERRQ(ierr);\
143         } \
144         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
145         a->singlemalloc = PETSC_TRUE; \
146  \
147         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
148         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
149         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
150         a->s_maxnz += bs2*CHUNKSIZE; \
151         a->reallocs++; \
152         a->s_nz++; \
153       } \
154       N = nrow++ - 1;  \
155       /* shift up all the later entries in this row */ \
156       for (ii=N; ii>=_i; ii--) { \
157         rp[ii+1] = rp[ii]; \
158         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
159       } \
160       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
161       rp[_i]                      = bcol;  \
162       ap[bs2*_i + bs*cidx + ridx] = value;  \
163       a_noinsert:; \
164     ailen[brow] = nrow; \
165 }
166 #ifndef MatSetValues_SeqBAIJ_B_Private
167 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
168 { \
169     brow = row/bs;  \
170     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171     rmax = bimax[brow]; nrow = bilen[brow]; \
172       bcol = col/bs; \
173       ridx = row % bs; cidx = col % bs; \
174       low = 0; high = nrow; \
175       while (high-low > 3) { \
176         t = (low+high)/2; \
177         if (rp[t] > bcol) high = t; \
178         else              low  = t; \
179       } \
180       for (_i=low; _i<high; _i++) { \
181         if (rp[_i] > bcol) break; \
182         if (rp[_i] == bcol) { \
183           bap  = ap +  bs2*_i + bs*cidx + ridx; \
184           if (addv == ADD_VALUES) *bap += value;  \
185           else                    *bap  = value;  \
186           goto b_noinsert; \
187         } \
188       } \
189       if (b->nonew == 1) goto b_noinsert; \
190       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
191       if (nrow >= rmax) { \
192         /* there is no extra room in row, therefore enlarge */ \
193         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
194         MatScalar *new_a; \
195  \
196         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
197  \
198         /* malloc new storage space */ \
199         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
200         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
201         new_j   = (int*)(new_a + bs2*new_nz); \
202         new_i   = new_j + new_nz; \
203  \
204         /* copy over old data into new slots */ \
205         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
206         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
207         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
208         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
209         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
210         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
211         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
213                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
214         /* free up old matrix storage */ \
215         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
216         if (!b->singlemalloc) { \
217           ierr = PetscFree(b->i);CHKERRQ(ierr); \
218           ierr = PetscFree(b->j);CHKERRQ(ierr); \
219         } \
220         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
221         b->singlemalloc = PETSC_TRUE; \
222  \
223         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
224         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
225         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
226         b->maxnz += bs2*CHUNKSIZE; \
227         b->reallocs++; \
228         b->nz++; \
229       } \
230       N = nrow++ - 1;  \
231       /* shift up all the later entries in this row */ \
232       for (ii=N; ii>=_i; ii--) { \
233         rp[ii+1] = rp[ii]; \
234         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
235       } \
236       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
237       rp[_i]                      = bcol;  \
238       ap[bs2*_i + bs*cidx + ridx] = value;  \
239       b_noinsert:; \
240     bilen[brow] = nrow; \
241 }
242 #endif
243 
244 #if defined(PETSC_USE_MAT_SINGLE)
245 #undef __FUNC__
246 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ"
247 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
248 {
249   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
250   int         ierr,i,N = m*n;
251   MatScalar   *vsingle;
252 
253   PetscFunctionBegin;
254   if (N > b->setvalueslen) {
255     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
256     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
257     b->setvalueslen  = N;
258   }
259   vsingle = b->setvaluescopy;
260 
261   for (i=0; i<N; i++) {
262     vsingle[i] = v[i];
263   }
264   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNC__
269 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
271 {
272   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
273   int         ierr,i,N = m*n*b->bs2;
274   MatScalar   *vsingle;
275 
276   PetscFunctionBegin;
277   if (N > b->setvalueslen) {
278     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
279     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
280     b->setvalueslen  = N;
281   }
282   vsingle = b->setvaluescopy;
283   for (i=0; i<N; i++) {
284     vsingle[i] = v[i];
285   }
286   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNC__
291 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT"
292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
293 {
294   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
295   int         ierr,i,N = m*n;
296   MatScalar   *vsingle;
297 
298   PetscFunctionBegin;
299   SETERRQ(1,"Function not yet written for SBAIJ format");
300   /* PetscFunctionReturn(0); */
301 }
302 
303 #undef __FUNC__
304 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT"
305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
306 {
307   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
308   int         ierr,i,N = m*n*b->bs2;
309   MatScalar   *vsingle;
310 
311   PetscFunctionBegin;
312   SETERRQ(1,"Function not yet written for SBAIJ format");
313   /* PetscFunctionReturn(0); */
314 }
315 #endif
316 
317 /* Only add/insert a(i,j) with i<=j (blocks).
318    Any a(i,j) with i>j input by user is ingored.
319 */
320 #undef __FUNC__
321 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
323 {
324   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
325   MatScalar    value;
326   PetscTruth   roworiented = baij->roworiented;
327   int          ierr,i,j,row,col;
328   int          rstart_orig=baij->rstart_bs;
329   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
330   int          cend_orig=baij->cend_bs,bs=baij->bs;
331 
332   /* Some Variables required in the macro */
333   Mat          A = baij->A;
334   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
335   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
336   MatScalar    *aa=a->a;
337 
338   Mat          B = baij->B;
339   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
340   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
341   MatScalar    *ba=b->a;
342 
343   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
344   int          low,high,t,ridx,cidx,bs2=a->bs2;
345   MatScalar    *ap,*bap;
346 
347   /* for stash */
348   int          n_loc, *in_loc=0;
349   MatScalar    *v_loc=0;
350 
351   PetscFunctionBegin;
352 
353   if(!baij->donotstash){
354     in_loc = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(in_loc);
355     v_loc  = (MatScalar*)PetscMalloc(n*sizeof(MatScalar));CHKPTRQ(v_loc);
356   }
357 
358   for (i=0; i<m; i++) {
359     if (im[i] < 0) continue;
360 #if defined(PETSC_USE_BOPT_g)
361     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
362 #endif
363     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
364       row = im[i] - rstart_orig;              /* local row index */
365       for (j=0; j<n; j++) {
366         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
367         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
368           col = in[j] - cstart_orig;          /* local col index */
369           brow = row/bs; bcol = col/bs;
370           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
371           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
372           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
373           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
374         } else if (in[j] < 0) continue;
375 #if defined(PETSC_USE_BOPT_g)
376         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
377 #endif
378         else {  /* off-diag entry (B) */
379           if (mat->was_assembled) {
380             if (!baij->colmap) {
381               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
382             }
383 #if defined (PETSC_USE_CTABLE)
384             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
385             col  = col - 1 + in[j]%bs;
386 #else
387             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
388 #endif
389             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
390               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
391               col =  in[j];
392               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
393               B = baij->B;
394               b = (Mat_SeqBAIJ*)(B)->data;
395               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
396               ba=b->a;
397             }
398           } else col = in[j];
399           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
400           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
401           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
402         }
403       }
404     } else {  /* off processor entry */
405       if (!baij->donotstash) {
406         n_loc = 0;
407         for (j=0; j<n; j++){
408           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
409           in_loc[n_loc] = in[j];
410           if (roworiented) {
411             v_loc[n_loc] = v[i*n+j];
412           } else {
413             v_loc[n_loc] = v[j*m+i];
414           }
415           n_loc++;
416         }
417         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
418       }
419     }
420   }
421 
422   if(!baij->donotstash){
423     ierr = PetscFree(in_loc);CHKERRQ(ierr);
424     ierr = PetscFree(v_loc);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNC__
430 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
432 {
433   PetscFunctionBegin;
434   SETERRQ(1,"Function not yet written for SBAIJ format");
435   /* PetscFunctionReturn(0); */
436 }
437 
438 #define HASH_KEY 0.6180339887
439 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
440 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
441 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
442 #undef __FUNC__
443 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar"
444 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
445 {
446   PetscFunctionBegin;
447   SETERRQ(1,"Function not yet written for SBAIJ format");
448   /* PetscFunctionReturn(0); */
449 }
450 
451 #undef __FUNC__
452 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
453 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
454 {
455   PetscFunctionBegin;
456   SETERRQ(1,"Function not yet written for SBAIJ format");
457   /* PetscFunctionReturn(0); */
458 }
459 
460 #undef __FUNC__
461 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ"
462 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
463 {
464   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
465   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
466   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
467 
468   PetscFunctionBegin;
469   for (i=0; i<m; i++) {
470     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
471     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
472     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
473       row = idxm[i] - bsrstart;
474       for (j=0; j<n; j++) {
475         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
476         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
477         if (idxn[j] >= bscstart && idxn[j] < bscend){
478           col = idxn[j] - bscstart;
479           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
480         } else {
481           if (!baij->colmap) {
482             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
483           }
484 #if defined (PETSC_USE_CTABLE)
485           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
486           data --;
487 #else
488           data = baij->colmap[idxn[j]/bs]-1;
489 #endif
490           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
491           else {
492             col  = data + idxn[j]%bs;
493             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
494           }
495         }
496       }
497     } else {
498       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
499     }
500   }
501  PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNC__
505 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ"
506 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
507 {
508   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
509   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
510   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
511   int        ierr;
512   PetscReal  sum[2],*lnorm2;
513 
514   PetscFunctionBegin;
515   if (baij->size == 1) {
516     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
517   } else {
518     if (type == NORM_FROBENIUS) {
519       lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2);
520       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
521       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
522       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
523       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
524       /*
525       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
526       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
527       */
528       ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
529       /*
530       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
531       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
532 
533       *norm = sqrt(sum[0] + 2*sum[1]);
534       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
535     } else {
536       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
537     }
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 /*
543   Creates the hash table, and sets the table
544   This table is created only once.
545   If new entried need to be added to the matrix
546   then the hash table has to be destroyed and
547   recreated.
548 */
549 #undef __FUNC__
550 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private"
551 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
552 {
553   PetscFunctionBegin;
554   SETERRQ(1,"Function not yet written for SBAIJ format");
555   /* PetscFunctionReturn(0); */
556 }
557 
558 #undef __FUNC__
559 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ"
560 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
561 {
562   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
563   int         ierr,nstash,reallocs;
564   InsertMode  addv;
565 
566   PetscFunctionBegin;
567   if (baij->donotstash) {
568     PetscFunctionReturn(0);
569   }
570 
571   /* make sure all processors are either in INSERTMODE or ADDMODE */
572   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
573   if (addv == (ADD_VALUES|INSERT_VALUES)) {
574     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
575   }
576   mat->insertmode = addv; /* in case this processor had no cache */
577 
578   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
579   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
580   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
581   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
582   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
583   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNC__
588 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ"
589 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
590 {
591   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
592   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
593   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
594   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
595   int         *row,*col,other_disassembled;
596   PetscTruth  r1,r2,r3;
597   MatScalar   *val;
598   InsertMode  addv = mat->insertmode;
599   int         rank;
600 
601   PetscFunctionBegin;
602   /* remove 2 line below later */
603   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
604 
605   if (!baij->donotstash) {
606     while (1) {
607       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
608       /*
609       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
610       PetscSynchronizedFlush(PETSC_COMM_WORLD);
611       */
612       if (!flg) break;
613 
614       for (i=0; i<n;) {
615         /* Now identify the consecutive vals belonging to the same row */
616         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
617         if (j < n) ncols = j-i;
618         else       ncols = n-i;
619         /* Now assemble all these values with a single function call */
620         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
621         i = j;
622       }
623     }
624     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
625     /* Now process the block-stash. Since the values are stashed column-oriented,
626        set the roworiented flag to column oriented, and after MatSetValues()
627        restore the original flags */
628     r1 = baij->roworiented;
629     r2 = a->roworiented;
630     r3 = b->roworiented;
631     baij->roworiented = PETSC_FALSE;
632     a->roworiented    = PETSC_FALSE;
633     b->roworiented    = PETSC_FALSE;
634     while (1) {
635       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
636       if (!flg) break;
637 
638       for (i=0; i<n;) {
639         /* Now identify the consecutive vals belonging to the same row */
640         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
641         if (j < n) ncols = j-i;
642         else       ncols = n-i;
643         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
644         i = j;
645       }
646     }
647     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
648     baij->roworiented = r1;
649     a->roworiented    = r2;
650     b->roworiented    = r3;
651   }
652 
653   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
654   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
655 
656   /* determine if any processor has disassembled, if so we must
657      also disassemble ourselfs, in order that we may reassemble. */
658   /*
659      if nonzero structure of submatrix B cannot change then we know that
660      no processor disassembled thus we can skip this stuff
661   */
662   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
663     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
664     if (mat->was_assembled && !other_disassembled) {
665       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
666     }
667   }
668 
669   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
670     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
671   }
672   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
673   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
674 
675 #if defined(PETSC_USE_BOPT_g)
676   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
677     PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
678     baij->ht_total_ct  = 0;
679     baij->ht_insert_ct = 0;
680   }
681 #endif
682   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
683     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
684     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
685     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
686   }
687 
688   if (baij->rowvalues) {
689     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
690     baij->rowvalues = 0;
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNC__
696 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket"
697 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
698 {
699   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)mat->data;
700   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
701   PetscTruth   isascii,isdraw;
702   Viewer       sviewer;
703 
704   PetscFunctionBegin;
705   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
706   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
707   if (isascii) {
708     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
709     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
710       MatInfo info;
711       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
712       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
713       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
714               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
715               baij->bs,(int)info.memory);CHKERRQ(ierr);
716       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
717       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
718       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
719       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
720       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
721       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
722       PetscFunctionReturn(0);
723     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
724       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
725       PetscFunctionReturn(0);
726     }
727   }
728 
729   if (isdraw) {
730     Draw       draw;
731     PetscTruth isnull;
732     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
733     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
734   }
735 
736   if (size == 1) {
737     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
738   } else {
739     /* assemble the entire matrix onto first processor. */
740     Mat         A;
741     Mat_SeqSBAIJ *Aloc;
742     Mat_SeqBAIJ *Bloc;
743     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
744     MatScalar   *a;
745 
746     if (!rank) {
747       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
748     } else {
749       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
750     }
751     PLogObjectParent(mat,A);
752 
753     /* copy over the A part */
754     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
755     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
756     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
757 
758     for (i=0; i<mbs; i++) {
759       rvals[0] = bs*(baij->rstart + i);
760       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
761       for (j=ai[i]; j<ai[i+1]; j++) {
762         col = (baij->cstart+aj[j])*bs;
763         for (k=0; k<bs; k++) {
764           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
765           col++; a += bs;
766         }
767       }
768     }
769     /* copy over the B part */
770     Bloc = (Mat_SeqBAIJ*)baij->B->data;
771     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
772     for (i=0; i<mbs; i++) {
773       rvals[0] = bs*(baij->rstart + i);
774       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
775       for (j=ai[i]; j<ai[i+1]; j++) {
776         col = baij->garray[aj[j]]*bs;
777         for (k=0; k<bs; k++) {
778           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
779           col++; a += bs;
780         }
781       }
782     }
783     ierr = PetscFree(rvals);CHKERRQ(ierr);
784     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
785     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
786     /*
787        Everyone has to call to draw the matrix since the graphics waits are
788        synchronized across all processors that share the Draw object
789     */
790     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
791     if (!rank) {
792       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
793     }
794     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
795     ierr = MatDestroy(A);CHKERRQ(ierr);
796   }
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNC__
801 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ"
802 int MatView_MPISBAIJ(Mat mat,Viewer viewer)
803 {
804   int        ierr;
805   PetscTruth isascii,isdraw,issocket,isbinary;
806 
807   PetscFunctionBegin;
808   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
809   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
810   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
811   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
812   if (isascii || isdraw || issocket || isbinary) {
813     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
814   } else {
815     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNC__
821 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ"
822 int MatDestroy_MPISBAIJ(Mat mat)
823 {
824   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
825   int         ierr;
826 
827   PetscFunctionBegin;
828 
829   if (mat->mapping) {
830     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
831   }
832   if (mat->bmapping) {
833     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
834   }
835   if (mat->rmap) {
836     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
837   }
838   if (mat->cmap) {
839     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
840   }
841 #if defined(PETSC_USE_LOG)
842   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
843 #endif
844 
845   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
846   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
847 
848   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
849   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
850   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
851 #if defined (PETSC_USE_CTABLE)
852   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
853 #else
854   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
855 #endif
856   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
857   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
858   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
859   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
860   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
861   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
862 #if defined(PETSC_USE_MAT_SINGLE)
863   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
864 #endif
865   ierr = PetscFree(baij);CHKERRQ(ierr);
866   PLogObjectDestroy(mat);
867   PetscHeaderDestroy(mat);
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNC__
872 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ"
873 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
874 {
875   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
876   int         ierr,nt;
877 
878   PetscFunctionBegin;
879   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
880   if (nt != A->n) {
881     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
882   }
883   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
884   if (nt != A->m) {
885     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
886   }
887 
888   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
889   /* do diagonal part */
890   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
891   /* do supperdiagonal part */
892   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
893   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
894   /* do subdiagonal part */
895   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
896   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
897   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
898 
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNC__
903 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ"
904 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
905 {
906   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
907   int        ierr;
908 
909   PetscFunctionBegin;
910   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
911   /* do diagonal part */
912   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
913   /* do supperdiagonal part */
914   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
915   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
916 
917   /* do subdiagonal part */
918   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
919   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
921 
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNC__
926 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ"
927 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
928 {
929   PetscFunctionBegin;
930   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
931   /* PetscFunctionReturn(0); */
932 }
933 
934 #undef __FUNC__
935 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ"
936 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   PetscFunctionBegin;
939   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
940   /* PetscFunctionReturn(0); */
941 }
942 
943 /*
944   This only works correctly for square matrices where the subblock A->A is the
945    diagonal block
946 */
947 #undef __FUNC__
948 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ"
949 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
950 {
951   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
952   int         ierr;
953 
954   PetscFunctionBegin;
955   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
956   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNC__
961 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ"
962 int MatScale_MPISBAIJ(Scalar *aa,Mat A)
963 {
964   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
965   int         ierr;
966 
967   PetscFunctionBegin;
968   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
969   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNC__
974 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ"
975 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
976 {
977   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
978 
979   PetscFunctionBegin;
980   if (m) *m = mat->rstart*mat->bs;
981   if (n) *n = mat->rend*mat->bs;
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNC__
986 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ"
987 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
988 {
989   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
990   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
991   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
992   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
993   int        *cmap,*idx_p,cstart = mat->cstart;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     int     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) { max = tmp; }
1009     }
1010     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1011     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1012   }
1013 
1014   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1015   lrow = row - brstart;  /* local row index */
1016 
1017   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1018   if (!v)   {pvA = 0; pvB = 0;}
1019   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1020   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1021   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1022   nztot = nzA + nzB;
1023 
1024   cmap  = mat->garray;
1025   if (v  || idx) {
1026     if (nztot) {
1027       /* Sort by increasing column numbers, assuming A and B already sorted */
1028       int imark = -1;
1029       if (v) {
1030         *v = v_p = mat->rowvalues;
1031         for (i=0; i<nzB; i++) {
1032           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1033           else break;
1034         }
1035         imark = i;
1036         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1037         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1038       }
1039       if (idx) {
1040         *idx = idx_p = mat->rowindices;
1041         if (imark > -1) {
1042           for (i=0; i<imark; i++) {
1043             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1044           }
1045         } else {
1046           for (i=0; i<nzB; i++) {
1047             if (cmap[cworkB[i]/bs] < cstart)
1048               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1049             else break;
1050           }
1051           imark = i;
1052         }
1053         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1054         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1055       }
1056     } else {
1057       if (idx) *idx = 0;
1058       if (v)   *v   = 0;
1059     }
1060   }
1061   *nz = nztot;
1062   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1063   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNC__
1068 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ"
1069 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1070 {
1071   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1072 
1073   PetscFunctionBegin;
1074   if (baij->getrowactive == PETSC_FALSE) {
1075     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1076   }
1077   baij->getrowactive = PETSC_FALSE;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNC__
1082 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ"
1083 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1084 {
1085   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1086 
1087   PetscFunctionBegin;
1088   *bs = baij->bs;
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNC__
1093 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ"
1094 int MatZeroEntries_MPISBAIJ(Mat A)
1095 {
1096   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1097   int         ierr;
1098 
1099   PetscFunctionBegin;
1100   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1101   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNC__
1106 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ"
1107 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1108 {
1109   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1110   Mat         A = a->A,B = a->B;
1111   int         ierr;
1112   PetscReal   isend[5],irecv[5];
1113 
1114   PetscFunctionBegin;
1115   info->block_size     = (double)a->bs;
1116   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1117   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1118   isend[3] = info->memory;  isend[4] = info->mallocs;
1119   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1120   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1121   isend[3] += info->memory;  isend[4] += info->mallocs;
1122   if (flag == MAT_LOCAL) {
1123     info->nz_used      = isend[0];
1124     info->nz_allocated = isend[1];
1125     info->nz_unneeded  = isend[2];
1126     info->memory       = isend[3];
1127     info->mallocs      = isend[4];
1128   } else if (flag == MAT_GLOBAL_MAX) {
1129     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1130     info->nz_used      = irecv[0];
1131     info->nz_allocated = irecv[1];
1132     info->nz_unneeded  = irecv[2];
1133     info->memory       = irecv[3];
1134     info->mallocs      = irecv[4];
1135   } else if (flag == MAT_GLOBAL_SUM) {
1136     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1137     info->nz_used      = irecv[0];
1138     info->nz_allocated = irecv[1];
1139     info->nz_unneeded  = irecv[2];
1140     info->memory       = irecv[3];
1141     info->mallocs      = irecv[4];
1142   } else {
1143     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1144   }
1145   info->rows_global       = (double)A->M;
1146   info->columns_global    = (double)A->N;
1147   info->rows_local        = (double)A->m;
1148   info->columns_local     = (double)A->N;
1149   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1150   info->fill_ratio_needed = 0;
1151   info->factor_mallocs    = 0;
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNC__
1156 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ"
1157 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1158 {
1159   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1160   int         ierr;
1161 
1162   PetscFunctionBegin;
1163   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1164       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1165       op == MAT_COLUMNS_UNSORTED ||
1166       op == MAT_COLUMNS_SORTED ||
1167       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1168       op == MAT_KEEP_ZEROED_ROWS ||
1169       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1170         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1171         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1172   } else if (op == MAT_ROW_ORIENTED) {
1173         a->roworiented = PETSC_TRUE;
1174         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1175         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1176   } else if (op == MAT_ROWS_SORTED ||
1177              op == MAT_ROWS_UNSORTED ||
1178              op == MAT_SYMMETRIC ||
1179              op == MAT_STRUCTURALLY_SYMMETRIC ||
1180              op == MAT_YES_NEW_DIAGONALS ||
1181              op == MAT_USE_HASH_TABLE) {
1182     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1183   } else if (op == MAT_COLUMN_ORIENTED) {
1184     a->roworiented = PETSC_FALSE;
1185     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1186     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1187   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1188     a->donotstash = PETSC_TRUE;
1189   } else if (op == MAT_NO_NEW_DIAGONALS) {
1190     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1191   } else if (op == MAT_USE_HASH_TABLE) {
1192     a->ht_flag = PETSC_TRUE;
1193   } else {
1194     SETERRQ(PETSC_ERR_SUP,"unknown option");
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNC__
1200 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ("
1201 int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1202 {
1203   PetscFunctionBegin;
1204   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
1205   /* PetscFunctionReturn(0); */
1206 }
1207 
1208 #undef __FUNC__
1209 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ"
1210 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1211 {
1212   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1213   Mat         a = baij->A,b = baij->B;
1214   int         ierr,s1,s2,s3;
1215 
1216   PetscFunctionBegin;
1217   if (ll != rr) {
1218     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1219   }
1220   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1221   if (rr) {
1222     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1223     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1224     /* Overlap communication with computation. */
1225     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1226     /*} if (ll) { */
1227     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1228     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1229     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1230     /* } */
1231   /* scale  the diagonal block */
1232   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1233 
1234   /* if (rr) { */
1235     /* Do a scatter end and then right scale the off-diagonal block */
1236     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1237     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1238   }
1239 
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNC__
1244 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ"
1245 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1246 {
1247   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1248   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1249   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1250   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1251   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1252   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1253   MPI_Comm       comm = A->comm;
1254   MPI_Request    *send_waits,*recv_waits;
1255   MPI_Status     recv_status,*send_status;
1256   IS             istmp;
1257 
1258   PetscFunctionBegin;
1259   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1260   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1261 
1262   /*  first count number of contributors to each processor */
1263   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1264   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1265   procs  = nprocs + size;
1266   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1267   for (i=0; i<N; i++) {
1268     idx   = rows[i];
1269     found = 0;
1270     for (j=0; j<size; j++) {
1271       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1272         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1273       }
1274     }
1275     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1276   }
1277   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1278 
1279   /* inform other processors of number of messages and max length*/
1280   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1281   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1282   nmax   = work[rank];
1283   nrecvs = work[size+rank];
1284   ierr = PetscFree(work);CHKERRQ(ierr);
1285 
1286   /* post receives:   */
1287   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1288   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1289   for (i=0; i<nrecvs; i++) {
1290     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1291   }
1292 
1293   /* do sends:
1294      1) starts[i] gives the starting index in svalues for stuff going to
1295      the ith processor
1296   */
1297   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1298   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1299   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1300   starts[0]  = 0;
1301   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1302   for (i=0; i<N; i++) {
1303     svalues[starts[owner[i]]++] = rows[i];
1304   }
1305   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1306 
1307   starts[0] = 0;
1308   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1309   count = 0;
1310   for (i=0; i<size; i++) {
1311     if (procs[i]) {
1312       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1313     }
1314   }
1315   ierr = PetscFree(starts);CHKERRQ(ierr);
1316 
1317   base = owners[rank]*bs;
1318 
1319   /*  wait on receives */
1320   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1321   source = lens + nrecvs;
1322   count  = nrecvs; slen = 0;
1323   while (count) {
1324     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1325     /* unpack receives into our local space */
1326     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1327     source[imdex]  = recv_status.MPI_SOURCE;
1328     lens[imdex]    = n;
1329     slen          += n;
1330     count--;
1331   }
1332   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1333 
1334   /* move the data into the send scatter */
1335   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1336   count = 0;
1337   for (i=0; i<nrecvs; i++) {
1338     values = rvalues + i*nmax;
1339     for (j=0; j<lens[i]; j++) {
1340       lrows[count++] = values[j] - base;
1341     }
1342   }
1343   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1344   ierr = PetscFree(lens);CHKERRQ(ierr);
1345   ierr = PetscFree(owner);CHKERRQ(ierr);
1346   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1347 
1348   /* actually zap the local rows */
1349   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1350   PLogObjectParent(A,istmp);
1351 
1352   /*
1353         Zero the required rows. If the "diagonal block" of the matrix
1354      is square and the user wishes to set the diagonal we use seperate
1355      code so that MatSetValues() is not called for each diagonal allocating
1356      new memory, thus calling lots of mallocs and slowing things down.
1357 
1358        Contributed by: Mathew Knepley
1359   */
1360   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1361   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1362   if (diag && (l->A->M == l->A->N)) {
1363     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1364   } else if (diag) {
1365     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1366     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1367       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1368 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1369     }
1370     for (i=0; i<slen; i++) {
1371       row  = lrows[i] + rstart_bs;
1372       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1373     }
1374     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376   } else {
1377     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1378   }
1379 
1380   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1381   ierr = PetscFree(lrows);CHKERRQ(ierr);
1382 
1383   /* wait on sends */
1384   if (nsends) {
1385     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1386     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1387     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1388   }
1389   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1390   ierr = PetscFree(svalues);CHKERRQ(ierr);
1391 
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNC__
1396 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ"
1397 int MatPrintHelp_MPISBAIJ(Mat A)
1398 {
1399   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1400   MPI_Comm    comm = A->comm;
1401   static int  called = 0;
1402   int         ierr;
1403 
1404   PetscFunctionBegin;
1405   if (!a->rank) {
1406     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1407   }
1408   if (called) {PetscFunctionReturn(0);} else called = 1;
1409   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1410   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNC__
1415 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ"
1416 int MatSetUnfactored_MPISBAIJ(Mat A)
1417 {
1418   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1419   int         ierr;
1420 
1421   PetscFunctionBegin;
1422   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1427 
1428 #undef __FUNC__
1429 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ"
1430 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1431 {
1432   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1433   Mat         a,b,c,d;
1434   PetscTruth  flg;
1435   int         ierr;
1436 
1437   PetscFunctionBegin;
1438   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr);
1439   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1440   a = matA->A; b = matA->B;
1441   c = matB->A; d = matB->B;
1442 
1443   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1444   if (flg == PETSC_TRUE) {
1445     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1446   }
1447   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 /* -------------------------------------------------------------------*/
1452 static struct _MatOps MatOps_Values = {
1453   MatSetValues_MPISBAIJ,
1454   MatGetRow_MPISBAIJ,
1455   MatRestoreRow_MPISBAIJ,
1456   MatMult_MPISBAIJ,
1457   MatMultAdd_MPISBAIJ,
1458   MatMultTranspose_MPISBAIJ,
1459   MatMultTransposeAdd_MPISBAIJ,
1460   0,
1461   0,
1462   0,
1463   0,
1464   0,
1465   0,
1466   0,
1467   MatTranspose_MPISBAIJ,
1468   MatGetInfo_MPISBAIJ,
1469   MatEqual_MPISBAIJ,
1470   MatGetDiagonal_MPISBAIJ,
1471   MatDiagonalScale_MPISBAIJ,
1472   MatNorm_MPISBAIJ,
1473   MatAssemblyBegin_MPISBAIJ,
1474   MatAssemblyEnd_MPISBAIJ,
1475   0,
1476   MatSetOption_MPISBAIJ,
1477   MatZeroEntries_MPISBAIJ,
1478   MatZeroRows_MPISBAIJ,
1479   0,
1480   0,
1481   0,
1482   0,
1483   0,
1484   0,
1485   MatGetOwnershipRange_MPISBAIJ,
1486   0,
1487   0,
1488   0,
1489   0,
1490   MatDuplicate_MPISBAIJ,
1491   0,
1492   0,
1493   0,
1494   0,
1495   0,
1496   MatGetSubMatrices_MPISBAIJ,
1497   MatIncreaseOverlap_MPISBAIJ,
1498   MatGetValues_MPISBAIJ,
1499   0,
1500   MatPrintHelp_MPISBAIJ,
1501   MatScale_MPISBAIJ,
1502   0,
1503   0,
1504   0,
1505   MatGetBlockSize_MPISBAIJ,
1506   0,
1507   0,
1508   0,
1509   0,
1510   0,
1511   0,
1512   MatSetUnfactored_MPISBAIJ,
1513   0,
1514   MatSetValuesBlocked_MPISBAIJ,
1515   0,
1516   0,
1517   0,
1518   MatGetMaps_Petsc,
1519   0,
1520   0,
1521   0,
1522   0,
1523   0,
1524   0,
1525   MatGetRowMax_MPISBAIJ};
1526 
1527 
1528 EXTERN_C_BEGIN
1529 #undef __FUNC__
1530 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ"
1531 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1532 {
1533   PetscFunctionBegin;
1534   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1535   *iscopy = PETSC_FALSE;
1536   PetscFunctionReturn(0);
1537 }
1538 EXTERN_C_END
1539 
1540 #undef __FUNC__
1541 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPISBAIJ"
1542 int MatCreate_MPISBAIJ(Mat B)
1543 {
1544   Mat_MPISBAIJ *b;
1545   int          ierr;
1546   PetscTruth   flg;
1547 
1548   PetscFunctionBegin;
1549   if (B->M != B->N || B->m != B->n){ /* N and n are not used after this */
1550     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, set M=N and m=n");
1551   }
1552 
1553   B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b);
1554   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1555   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1556 
1557   B->ops->destroy    = MatDestroy_MPISBAIJ;
1558   B->ops->view       = MatView_MPISBAIJ;
1559   B->mapping    = 0;
1560   B->factor     = 0;
1561   B->assembled  = PETSC_FALSE;
1562 
1563   B->insertmode = NOT_SET_VALUES;
1564   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1565   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1566 
1567   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1568   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1569 
1570   /* the information in the maps duplicates the information computed below, eventually
1571      we should remove the duplicate information that is not contained in the maps */
1572   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1573   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1574 
1575   /* build local table of row and column ownerships */
1576   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1577   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1578 
1579   /* build cache for off array entries formed */
1580   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1581   b->donotstash  = PETSC_FALSE;
1582   b->colmap      = PETSC_NULL;
1583   b->garray      = PETSC_NULL;
1584   b->roworiented = PETSC_TRUE;
1585 
1586 #if defined(PEYSC_USE_MAT_SINGLE)
1587   /* stuff for MatSetValues_XXX in single precision */
1588   b->lensetvalues     = 0;
1589   b->setvaluescopy    = PETSC_NULL;
1590 #endif
1591 
1592   /* stuff used in block assembly */
1593   b->barray       = 0;
1594 
1595   /* stuff used for matrix vector multiply */
1596   b->lvec         = 0;
1597   b->Mvctx        = 0;
1598 
1599   /* stuff for MatGetRow() */
1600   b->rowindices   = 0;
1601   b->rowvalues    = 0;
1602   b->getrowactive = PETSC_FALSE;
1603 
1604   /* hash table stuff */
1605   b->ht           = 0;
1606   b->hd           = 0;
1607   b->ht_size      = 0;
1608   b->ht_flag      = PETSC_FALSE;
1609   b->ht_fact      = 0;
1610   b->ht_total_ct  = 0;
1611   b->ht_insert_ct = 0;
1612 
1613   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1614   if (flg) {
1615     double fact = 1.39;
1616     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1617     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1618     if (fact <= 1.0) fact = 1.39;
1619     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1620     PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1621   }
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1623                                      "MatStoreValues_MPISBAIJ",
1624                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1626                                      "MatRetrieveValues_MPISBAIJ",
1627                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1629                                      "MatGetDiagonalBlock_MPISBAIJ",
1630                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNC__
1635 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetPreallocation"
1636 /*@C
1637    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1638    the user should preallocate the matrix storage by setting the parameters
1639    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1640    performance can be increased by more than a factor of 50.
1641 
1642    Collective on Mat
1643 
1644    Input Parameters:
1645 +  A - the matrix
1646 .  bs   - size of blockk
1647 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1648            submatrix  (same for all local rows)
1649 .  d_nnz - array containing the number of block nonzeros in the various block rows
1650            of the in diagonal portion of the local (possibly different for each block
1651            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1652 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1653            submatrix (same for all local rows).
1654 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1655            off-diagonal portion of the local submatrix (possibly different for
1656            each block row) or PETSC_NULL.
1657 
1658 
1659    Options Database Keys:
1660 .   -mat_no_unroll - uses code that does not unroll the loops in the
1661                      block calculations (much slower)
1662 .   -mat_block_size - size of the blocks to use
1663 
1664    Notes:
1665 
1666    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1667    than it must be used on all processors that share the object for that argument.
1668 
1669    Storage Information:
1670    For a square global matrix we define each processor's diagonal portion
1671    to be its local rows and the corresponding columns (a square submatrix);
1672    each processor's off-diagonal portion encompasses the remainder of the
1673    local matrix (a rectangular submatrix).
1674 
1675    The user can specify preallocated storage for the diagonal part of
1676    the local submatrix with either d_nz or d_nnz (not both).  Set
1677    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1678    memory allocation.  Likewise, specify preallocated storage for the
1679    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1680 
1681    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1682    the figure below we depict these three local rows and all columns (0-11).
1683 
1684 .vb
1685            0 1 2 3 4 5 6 7 8 9 10 11
1686           -------------------
1687    row 3  |  o o o d d d o o o o o o
1688    row 4  |  o o o d d d o o o o o o
1689    row 5  |  o o o d d d o o o o o o
1690           -------------------
1691 .ve
1692 
1693    Thus, any entries in the d locations are stored in the d (diagonal)
1694    submatrix, and any entries in the o locations are stored in the
1695    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1696    stored simply in the MATSEQBAIJ format for compressed row storage.
1697 
1698    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1699    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1700    In general, for PDE problems in which most nonzeros are near the diagonal,
1701    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1702    or you will get TERRIBLE performance; see the users' manual chapter on
1703    matrices.
1704 
1705    Level: intermediate
1706 
1707 .keywords: matrix, block, aij, compressed row, sparse, parallel
1708 
1709 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1710 @*/
1711 
1712 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1713 {
1714   Mat_MPISBAIJ *b;
1715   int          ierr,i,mbs,Mbs=PETSC_DECIDE;
1716 
1717   PetscFunctionBegin;
1718   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1719 
1720   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1721   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz);
1722   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz);
1723   if (d_nnz) {
1724     for (i=0; i<B->m/bs; i++) {
1725       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1726     }
1727   }
1728   if (o_nnz) {
1729     for (i=0; i<B->m/bs; i++) {
1730       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1731     }
1732   }
1733 
1734   b   = (Mat_MPISBAIJ*)B->data;
1735   mbs = B->m/bs;
1736   Mbs = B->M/bs;
1737   if (mbs*bs != B->m) {
1738     SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows/cols must be divisible by blocksize");
1739   }
1740 
1741   b->bs  = bs;
1742   b->bs2 = bs*bs;
1743   b->mbs = mbs;
1744   b->nbs = mbs;
1745   b->Mbs = Mbs;
1746   b->Nbs = Mbs;
1747 
1748   b->cowners    = b->rowners + b->size + 2;
1749   b->rowners_bs = b->cowners + b->size + 2;
1750   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1751   b->rowners[0]    = 0;
1752   for (i=2; i<=b->size; i++) {
1753     b->rowners[i] += b->rowners[i-1];
1754   }
1755   b->rstart    = b->rowners[b->rank];
1756   b->rend      = b->rowners[b->rank+1];
1757   b->cstart    = b->rstart;
1758   b->cend      = b->rend;
1759   for (i=0; i<=b->size; i++) {
1760     b->rowners_bs[i] = b->rowners[i]*bs;
1761   }
1762   b->rstart_bs = b->rstart * bs;
1763   b->rend_bs   = b->rend * bs;
1764 
1765   b->cstart_bs = b->cstart * bs;
1766   b->cend_bs   = b->cend * bs;
1767 
1768 
1769   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1770   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1771   PLogObjectParent(B,b->A);
1772   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1773   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1774   PLogObjectParent(B,b->B);
1775 
1776   /* build cache for off array entries formed */
1777   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1778 
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNC__
1783 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ"
1784 /*@C
1785    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1786    (block compressed row).  For good matrix assembly performance
1787    the user should preallocate the matrix storage by setting the parameters
1788    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1789    performance can be increased by more than a factor of 50.
1790 
1791    Collective on MPI_Comm
1792 
1793    Input Parameters:
1794 +  comm - MPI communicator
1795 .  bs   - size of blockk
1796 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1797            This value should be the same as the local size used in creating the
1798            y vector for the matrix-vector product y = Ax.
1799 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1800            This value should be the same as the local size used in creating the
1801            x vector for the matrix-vector product y = Ax.
1802 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1803 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1804 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1805            submatrix  (same for all local rows)
1806 .  d_nnz - array containing the number of block nonzeros in the various block rows
1807            of the in diagonal portion of the local (possibly different for each block
1808            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1809 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1810            submatrix (same for all local rows).
1811 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1812            off-diagonal portion of the local submatrix (possibly different for
1813            each block row) or PETSC_NULL.
1814 
1815    Output Parameter:
1816 .  A - the matrix
1817 
1818    Options Database Keys:
1819 .   -mat_no_unroll - uses code that does not unroll the loops in the
1820                      block calculations (much slower)
1821 .   -mat_block_size - size of the blocks to use
1822 .   -mat_mpi - use the parallel matrix data structures even on one processor
1823                (defaults to using SeqBAIJ format on one processor)
1824 
1825    Notes:
1826    The user MUST specify either the local or global matrix dimensions
1827    (possibly both).
1828 
1829    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1830    than it must be used on all processors that share the object for that argument.
1831 
1832    Storage Information:
1833    For a square global matrix we define each processor's diagonal portion
1834    to be its local rows and the corresponding columns (a square submatrix);
1835    each processor's off-diagonal portion encompasses the remainder of the
1836    local matrix (a rectangular submatrix).
1837 
1838    The user can specify preallocated storage for the diagonal part of
1839    the local submatrix with either d_nz or d_nnz (not both).  Set
1840    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1841    memory allocation.  Likewise, specify preallocated storage for the
1842    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1843 
1844    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1845    the figure below we depict these three local rows and all columns (0-11).
1846 
1847 .vb
1848            0 1 2 3 4 5 6 7 8 9 10 11
1849           -------------------
1850    row 3  |  o o o d d d o o o o o o
1851    row 4  |  o o o d d d o o o o o o
1852    row 5  |  o o o d d d o o o o o o
1853           -------------------
1854 .ve
1855 
1856    Thus, any entries in the d locations are stored in the d (diagonal)
1857    submatrix, and any entries in the o locations are stored in the
1858    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1859    stored simply in the MATSEQBAIJ format for compressed row storage.
1860 
1861    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1862    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1863    In general, for PDE problems in which most nonzeros are near the diagonal,
1864    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1865    or you will get TERRIBLE performance; see the users' manual chapter on
1866    matrices.
1867 
1868    Level: intermediate
1869 
1870 .keywords: matrix, block, aij, compressed row, sparse, parallel
1871 
1872 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1873 @*/
1874 
1875 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1876 {
1877   int ierr;
1878 
1879   PetscFunctionBegin;
1880   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1881   ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1882   ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 
1887 #undef __FUNC__
1888 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ"
1889 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1890 {
1891   Mat          mat;
1892   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1893   int          ierr,len=0;
1894 
1895   PetscFunctionBegin;
1896   *newmat       = 0;
1897   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1898   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
1899   a = (Mat_MPISBAIJ*)mat->data;
1900   a->bs  = oldmat->bs;
1901   a->bs2 = oldmat->bs2;
1902   a->mbs = oldmat->mbs;
1903   a->nbs = oldmat->nbs;
1904   a->Mbs = oldmat->Mbs;
1905   a->Nbs = oldmat->Nbs;
1906 
1907   a->rstart       = oldmat->rstart;
1908   a->rend         = oldmat->rend;
1909   a->cstart       = oldmat->cstart;
1910   a->cend         = oldmat->cend;
1911   a->size         = oldmat->size;
1912   a->rank         = oldmat->rank;
1913   a->donotstash   = oldmat->donotstash;
1914   a->roworiented  = oldmat->roworiented;
1915   a->rowindices   = 0;
1916   a->rowvalues    = 0;
1917   a->getrowactive = PETSC_FALSE;
1918   a->barray       = 0;
1919   a->rstart_bs    = oldmat->rstart_bs;
1920   a->rend_bs      = oldmat->rend_bs;
1921   a->cstart_bs    = oldmat->cstart_bs;
1922   a->cend_bs      = oldmat->cend_bs;
1923 
1924   /* hash table stuff */
1925   a->ht           = 0;
1926   a->hd           = 0;
1927   a->ht_size      = 0;
1928   a->ht_flag      = oldmat->ht_flag;
1929   a->ht_fact      = oldmat->ht_fact;
1930   a->ht_total_ct  = 0;
1931   a->ht_insert_ct = 0;
1932 
1933   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1934   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1935   a->cowners    = a->rowners + a->size + 2;
1936   a->rowners_bs = a->cowners + a->size + 2;
1937   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1938   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1939   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
1940   if (oldmat->colmap) {
1941 #if defined (PETSC_USE_CTABLE)
1942   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1943 #else
1944     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
1945     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1946     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
1947 #endif
1948   } else a->colmap = 0;
1949   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1950     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
1951     PLogObjectMemory(mat,len*sizeof(int));
1952     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
1953   } else a->garray = 0;
1954 
1955   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1956   PLogObjectParent(mat,a->lvec);
1957   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1958 
1959   PLogObjectParent(mat,a->Mvctx);
1960   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1961   PLogObjectParent(mat,a->A);
1962   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1963   PLogObjectParent(mat,a->B);
1964   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1965   *newmat = mat;
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #include "petscsys.h"
1970 
1971 #undef __FUNC__
1972 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ"
1973 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat)
1974 {
1975   Mat          A;
1976   int          i,nz,ierr,j,rstart,rend,fd;
1977   Scalar       *vals,*buf;
1978   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1979   MPI_Status   status;
1980   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
1981   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
1982   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
1983   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1984   int          dcount,kmax,k,nzcount,tmp;
1985 
1986   PetscFunctionBegin;
1987   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1988 
1989   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1990   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1991   if (!rank) {
1992     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1993     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1994     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1995     if (header[3] < 0) {
1996       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
1997     }
1998   }
1999 
2000   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2001   M = header[1]; N = header[2];
2002 
2003   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2004 
2005   /*
2006      This code adds extra rows to make sure the number of rows is
2007      divisible by the blocksize
2008   */
2009   Mbs        = M/bs;
2010   extra_rows = bs - M + bs*(Mbs);
2011   if (extra_rows == bs) extra_rows = 0;
2012   else                  Mbs++;
2013   if (extra_rows &&!rank) {
2014     PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2015   }
2016 
2017   /* determine ownership of all rows */
2018   mbs = Mbs/size + ((Mbs % size) > rank);
2019   m   = mbs * bs;
2020   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2021   browners = rowners + size + 1;
2022   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2023   rowners[0] = 0;
2024   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2025   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2026   rstart = rowners[rank];
2027   rend   = rowners[rank+1];
2028 
2029   /* distribute row lengths to all processors */
2030   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2031   if (!rank) {
2032     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2033     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2034     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2035     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2036     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2037     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2038     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2039   } else {
2040     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2041   }
2042 
2043   if (!rank) {   /* procs[0] */
2044     /* calculate the number of nonzeros on each processor */
2045     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2046     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2047     for (i=0; i<size; i++) {
2048       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2049         procsnz[i] += rowlengths[j];
2050       }
2051     }
2052     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2053 
2054     /* determine max buffer needed and allocate it */
2055     maxnz = 0;
2056     for (i=0; i<size; i++) {
2057       maxnz = PetscMax(maxnz,procsnz[i]);
2058     }
2059     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2060 
2061     /* read in my part of the matrix column indices  */
2062     nz = procsnz[0];
2063     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2064     mycols = ibuf;
2065     if (size == 1)  nz -= extra_rows;
2066     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2067     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2068 
2069     /* read in every ones (except the last) and ship off */
2070     for (i=1; i<size-1; i++) {
2071       nz   = procsnz[i];
2072       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2073       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2074     }
2075     /* read in the stuff for the last proc */
2076     if (size != 1) {
2077       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2078       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2079       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2080       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2081     }
2082     ierr = PetscFree(cols);CHKERRQ(ierr);
2083   } else {  /* procs[i], i>0 */
2084     /* determine buffer space needed for message */
2085     nz = 0;
2086     for (i=0; i<m; i++) {
2087       nz += locrowlens[i];
2088     }
2089     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2090     mycols = ibuf;
2091     /* receive message of column indices*/
2092     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2093     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2094     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2095   }
2096 
2097   /* loop over local rows, determining number of off diagonal entries */
2098   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2099   odlens = dlens + (rend-rstart);
2100   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2101   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2102   masked1 = mask    + Mbs;
2103   masked2 = masked1 + Mbs;
2104   rowcount = 0; nzcount = 0;
2105   for (i=0; i<mbs; i++) {
2106     dcount  = 0;
2107     odcount = 0;
2108     for (j=0; j<bs; j++) {
2109       kmax = locrowlens[rowcount];
2110       for (k=0; k<kmax; k++) {
2111         tmp = mycols[nzcount++]/bs; /* block col. index */
2112         if (!mask[tmp]) {
2113           mask[tmp] = 1;
2114           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2115           else masked1[dcount++] = tmp; /* entry in diag portion */
2116         }
2117       }
2118       rowcount++;
2119     }
2120 
2121     dlens[i]  = dcount;  /* d_nzz[i] */
2122     odlens[i] = odcount; /* o_nzz[i] */
2123 
2124     /* zero out the mask elements we set */
2125     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2126     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2127   }
2128 
2129   /* create our matrix */
2130   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2131   CHKERRQ(ierr);
2132   A = *newmat;
2133   MatSetOption(A,MAT_COLUMNS_SORTED);
2134 
2135   if (!rank) {
2136     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2137     /* read in my part of the matrix numerical values  */
2138     nz = procsnz[0];
2139     vals = buf;
2140     mycols = ibuf;
2141     if (size == 1)  nz -= extra_rows;
2142     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2143     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2144 
2145     /* insert into matrix */
2146     jj      = rstart*bs;
2147     for (i=0; i<m; i++) {
2148       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2149       mycols += locrowlens[i];
2150       vals   += locrowlens[i];
2151       jj++;
2152     }
2153 
2154     /* read in other processors (except the last one) and ship out */
2155     for (i=1; i<size-1; i++) {
2156       nz   = procsnz[i];
2157       vals = buf;
2158       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2159       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2160     }
2161     /* the last proc */
2162     if (size != 1){
2163       nz   = procsnz[i] - extra_rows;
2164       vals = buf;
2165       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2166       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2167       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2168     }
2169     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2170 
2171   } else {
2172     /* receive numeric values */
2173     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2174 
2175     /* receive message of values*/
2176     vals   = buf;
2177     mycols = ibuf;
2178     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2179     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2180     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2181 
2182     /* insert into matrix */
2183     jj      = rstart*bs;
2184     for (i=0; i<m; i++) {
2185       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2186       mycols += locrowlens[i];
2187       vals   += locrowlens[i];
2188       jj++;
2189     }
2190   }
2191 
2192   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2193   ierr = PetscFree(buf);CHKERRQ(ierr);
2194   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2195   ierr = PetscFree(rowners);CHKERRQ(ierr);
2196   ierr = PetscFree(dlens);CHKERRQ(ierr);
2197   ierr = PetscFree(mask);CHKERRQ(ierr);
2198   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2199   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 #undef __FUNC__
2204 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor"
2205 /*@
2206    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2207 
2208    Input Parameters:
2209 .  mat  - the matrix
2210 .  fact - factor
2211 
2212    Collective on Mat
2213 
2214    Level: advanced
2215 
2216   Notes:
2217    This can also be set by the command line option: -mat_use_hash_table fact
2218 
2219 .keywords: matrix, hashtable, factor, HT
2220 
2221 .seealso: MatSetOption()
2222 @*/
2223 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2224 {
2225   PetscFunctionBegin;
2226   SETERRQ(1,"Function not yet written for SBAIJ format");
2227   /* PetscFunctionReturn(0); */
2228 }
2229 
2230 #undef __FUNC__
2231 #define __FUNC__ /*<a name=""></a>*/"MatGetRowMax_MPISBAIJ"
2232 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2233 {
2234   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2235   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2236   PetscReal    atmp;
2237   double       *work,*svalues,*rvalues;
2238   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2239   int          rank,size,*rowners_bs,dest,count,source;
2240   Scalar       *ba,*va;
2241   MPI_Status   stat;
2242 
2243   PetscFunctionBegin;
2244   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2245   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2246 
2247   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRA(ierr);
2248   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRA(ierr);
2249 
2250   bs   = a->bs;
2251   mbs  = a->mbs;
2252   Mbs  = a->Mbs;
2253   ba   = b->a;
2254   bi   = b->i;
2255   bj   = b->j;
2256   /*
2257   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2258   PetscSynchronizedFlush(PETSC_COMM_WORLD);
2259   */
2260 
2261   /* find ownerships */
2262   rowners_bs = a->rowners_bs;
2263   /*
2264   for (i=0; i<size+1; i++) {
2265     PetscPrintf(PETSC_COMM_SELF,"rowners_bs: %d\n",i,rowners_bs[i]);
2266   }
2267   */
2268 
2269   /* each proc creates an array to be distributed */
2270   work  = (PetscReal*)PetscMalloc(bs*Mbs*sizeof(PetscReal));CHKPTRQ(work);
2271   ierr  = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2272 
2273   /* row_max for B */
2274   for (i=0; i<mbs; i++) {
2275     ncols = bi[1] - bi[0]; bi++;
2276     brow  = bs*i;
2277     for (j=0; j<ncols; j++){
2278       bcol = bs*(*bj);
2279       for (kcol=0; kcol<bs; kcol++){
2280         col = bcol + kcol;                 /* local col index */
2281         col += rowners_bs[rank] + bs;      /* global col index */
2282         /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2283         for (krow=0; krow<bs; krow++){
2284           atmp = PetscAbsScalar(*ba); ba++;
2285           row = brow + krow;    /* local row index */
2286           /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2287           if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2288           if (work[col] < atmp) work[col] = atmp;
2289         }
2290       }
2291       bj++;
2292     }
2293   }
2294 
2295   /* send values to its owners */
2296   if (rank != size-1){
2297     for (dest=rank+1; dest<size; dest++){
2298       svalues = work + rowners_bs[dest];
2299       count = rowners_bs[dest+1]-rowners_bs[dest];
2300       MPI_Send(svalues,count,MPI_DOUBLE_PRECISION,dest,rank,PETSC_COMM_WORLD);
2301       /*
2302       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]\n",rank,count,dest);
2303       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2304       */
2305     }
2306   }
2307 
2308   /* receive values */
2309   if (rank){
2310     rvalues = work;
2311     count = rowners_bs[rank+1]-rowners_bs[rank];
2312     for (source=0; source<rank; source++){
2313       MPI_Recv(rvalues,count,MPI_DOUBLE_PRECISION,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);
2314       /* process values */
2315       for (i=0; i<count; i++){
2316         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2317       }
2318       /*
2319       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received from [%d] \n",rank,stat.MPI_SOURCE);
2320       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2321       */
2322     }
2323   }
2324 
2325   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2326   ierr = PetscFree(work);CHKERRA(ierr);
2327   PetscFunctionReturn(0);
2328 }
2329