xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 29bbc08cd5461c366aba6645924e8ff42acd1de0)
1 /*$Id: mpisbaij.c,v 1.23 2000/09/27 21:14:30 balay Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19 extern int MatZeroRows_SeqBAIJ(Mat,IS,Scalar *);
20 
21 /*  UGLY, ugly, ugly
22    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
23    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
24    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
25    converts the entries into single precision and then calls ..._MatScalar() to put them
26    into the single precision data structures.
27 */
28 #if defined(PETSC_USE_MAT_SINGLE)
29 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 #else
35 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
36 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
37 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
38 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
39 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
40 #endif
41 
42 EXTERN_C_BEGIN
43 #undef __FUNC__
44 #define __FUNC__ /*<a name="MatStoreValues_MPISBAIJ"></a>*/"MatStoreValues_MPISBAIJ"
45 int MatStoreValues_MPISBAIJ(Mat mat)
46 {
47   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
48   int         ierr;
49 
50   PetscFunctionBegin;
51   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
52   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 EXTERN_C_END
56 
57 EXTERN_C_BEGIN
58 #undef __FUNC__
59 #define __FUNC__ /*<a name="MatRetrieveValues_MPISBAIJ"></a>*/"MatRetrieveValues_MPISBAIJ"
60 int MatRetrieveValues_MPISBAIJ(Mat mat)
61 {
62   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
63   int         ierr;
64 
65   PetscFunctionBegin;
66   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
67   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 EXTERN_C_END
71 
72 /*
73      Local utility routine that creates a mapping from the global column
74    number to the local number in the off-diagonal part of the local
75    storage of the matrix.  This is done in a non scable way since the
76    length of colmap equals the global matrix length.
77 */
78 #undef __FUNC__
79 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private"
80 static int CreateColmap_MPISBAIJ_Private(Mat mat)
81 {
82   PetscFunctionBegin;
83   SETERRQ(1,"Function not yet written for SBAIJ format");
84   /* PetscFunctionReturn(0); */
85 }
86 
87 #define CHUNKSIZE  10
88 
89 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
90 { \
91  \
92     brow = row/bs;  \
93     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
94     rmax = aimax[brow]; nrow = ailen[brow]; \
95       bcol = col/bs; \
96       ridx = row % bs; cidx = col % bs; \
97       low = 0; high = nrow; \
98       while (high-low > 3) { \
99         t = (low+high)/2; \
100         if (rp[t] > bcol) high = t; \
101         else              low  = t; \
102       } \
103       for (_i=low; _i<high; _i++) { \
104         if (rp[_i] > bcol) break; \
105         if (rp[_i] == bcol) { \
106           bap  = ap +  bs2*_i + bs*cidx + ridx; \
107           if (addv == ADD_VALUES) *bap += value;  \
108           else                    *bap  = value;  \
109           goto a_noinsert; \
110         } \
111       } \
112       if (a->nonew == 1) goto a_noinsert; \
113       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
114       if (nrow >= rmax) { \
115         /* there is no extra room in row, therefore enlarge */ \
116         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
117         MatScalar *new_a; \
118  \
119         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
120  \
121         /* malloc new storage space */ \
122         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
123         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
124         new_j   = (int*)(new_a + bs2*new_nz); \
125         new_i   = new_j + new_nz; \
126  \
127         /* copy over old data into new slots */ \
128         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
129         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
130         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
131         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
132         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
133         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
134         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
135         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
136                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
137         /* free up old matrix storage */ \
138         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
139         if (!a->singlemalloc) { \
140           ierr = PetscFree(a->i);CHKERRQ(ierr); \
141           ierr = PetscFree(a->j);CHKERRQ(ierr);\
142         } \
143         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
144         a->singlemalloc = PETSC_TRUE; \
145  \
146         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
147         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
148         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
149         a->s_maxnz += bs2*CHUNKSIZE; \
150         a->reallocs++; \
151         a->s_nz++; \
152       } \
153       N = nrow++ - 1;  \
154       /* shift up all the later entries in this row */ \
155       for (ii=N; ii>=_i; ii--) { \
156         rp[ii+1] = rp[ii]; \
157         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
158       } \
159       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
160       rp[_i]                      = bcol;  \
161       ap[bs2*_i + bs*cidx + ridx] = value;  \
162       a_noinsert:; \
163     ailen[brow] = nrow; \
164 }
165 #ifndef MatSetValues_SeqBAIJ_B_Private
166 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
167 { \
168     brow = row/bs;  \
169     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170     rmax = bimax[brow]; nrow = bilen[brow]; \
171       bcol = col/bs; \
172       ridx = row % bs; cidx = col % bs; \
173       low = 0; high = nrow; \
174       while (high-low > 3) { \
175         t = (low+high)/2; \
176         if (rp[t] > bcol) high = t; \
177         else              low  = t; \
178       } \
179       for (_i=low; _i<high; _i++) { \
180         if (rp[_i] > bcol) break; \
181         if (rp[_i] == bcol) { \
182           bap  = ap +  bs2*_i + bs*cidx + ridx; \
183           if (addv == ADD_VALUES) *bap += value;  \
184           else                    *bap  = value;  \
185           goto b_noinsert; \
186         } \
187       } \
188       if (b->nonew == 1) goto b_noinsert; \
189       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
190       if (nrow >= rmax) { \
191         /* there is no extra room in row, therefore enlarge */ \
192         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
193         MatScalar *new_a; \
194  \
195         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
196  \
197         /* malloc new storage space */ \
198         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
199         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
200         new_j   = (int*)(new_a + bs2*new_nz); \
201         new_i   = new_j + new_nz; \
202  \
203         /* copy over old data into new slots */ \
204         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
205         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
206         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
207         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
208         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
209         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
210         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
211         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
212                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
213         /* free up old matrix storage */ \
214         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
215         if (!b->singlemalloc) { \
216           ierr = PetscFree(b->i);CHKERRQ(ierr); \
217           ierr = PetscFree(b->j);CHKERRQ(ierr); \
218         } \
219         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
220         b->singlemalloc = PETSC_TRUE; \
221  \
222         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
223         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
224         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
225         b->maxnz += bs2*CHUNKSIZE; \
226         b->reallocs++; \
227         b->nz++; \
228       } \
229       N = nrow++ - 1;  \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp[ii+1] = rp[ii]; \
233         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
234       } \
235       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
236       rp[_i]                      = bcol;  \
237       ap[bs2*_i + bs*cidx + ridx] = value;  \
238       b_noinsert:; \
239     bilen[brow] = nrow; \
240 }
241 #endif
242 
243 #if defined(PETSC_USE_MAT_SINGLE)
244 #undef __FUNC__
245 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ"
246 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
247 {
248   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
249   int         ierr,i,N = m*n;
250   MatScalar   *vsingle;
251 
252   PetscFunctionBegin;
253   if (N > b->setvalueslen) {
254     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
255     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
256     b->setvalueslen  = N;
257   }
258   vsingle = b->setvaluescopy;
259 
260   for (i=0; i<N; i++) {
261     vsingle[i] = v[i];
262   }
263   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
264   PetscFunctionReturn(0);
265 }
266 
267 #undef __FUNC__
268 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
269 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
270 {
271   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
272   int         ierr,i,N = m*n*b->bs2;
273   MatScalar   *vsingle;
274 
275   PetscFunctionBegin;
276   if (N > b->setvalueslen) {
277     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
278     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
279     b->setvalueslen  = N;
280   }
281   vsingle = b->setvaluescopy;
282   for (i=0; i<N; i++) {
283     vsingle[i] = v[i];
284   }
285   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 #undef __FUNC__
290 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT"
291 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
292 {
293   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
294   int         ierr,i,N = m*n;
295   MatScalar   *vsingle;
296 
297   PetscFunctionBegin;
298   SETERRQ(1,"Function not yet written for SBAIJ format");
299   /* PetscFunctionReturn(0); */
300 }
301 
302 #undef __FUNC__
303 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT"
304 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
305 {
306   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
307   int         ierr,i,N = m*n*b->bs2;
308   MatScalar   *vsingle;
309 
310   PetscFunctionBegin;
311   SETERRQ(1,"Function not yet written for SBAIJ format");
312   /* PetscFunctionReturn(0); */
313 }
314 #endif
315 
316 /* Only add/insert a(i,j) with i<=j (blocks).
317    Any a(i,j) with i>j input by user is ingored.
318 */
319 #undef __FUNC__
320 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
321 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
322 {
323   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
324   MatScalar   value;
325   int         ierr,i,j,row,col;
326   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
327   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
328   int         cend_orig=baij->cend_bs,bs=baij->bs;
329 
330   /* Some Variables required in the macro */
331   Mat         A = baij->A;
332   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
333   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
334   MatScalar   *aa=a->a;
335 
336   Mat         B = baij->B;
337   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
338   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
339   MatScalar   *ba=b->a;
340 
341   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
342   int         low,high,t,ridx,cidx,bs2=a->bs2;
343   MatScalar   *ap,*bap;
344 
345   /* for stash */
346   int         n_loc, *in_loc=0;
347   MatScalar   *v_loc=0;
348 
349   PetscFunctionBegin;
350 
351   if(!baij->donotstash){
352     in_loc = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(in_loc);
353     v_loc  = (MatScalar*)PetscMalloc(n*sizeof(MatScalar));CHKPTRQ(v_loc);
354   }
355 
356   for (i=0; i<m; i++) {
357     if (im[i] < 0) continue;
358 #if defined(PETSC_USE_BOPT_g)
359     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
360 #endif
361     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
362       row = im[i] - rstart_orig;              /* local row index */
363       for (j=0; j<n; j++) {
364         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
365         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
366           col = in[j] - cstart_orig;          /* local col index */
367           brow = row/bs; bcol = col/bs;
368           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
369           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
370           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
371           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
372         } else if (in[j] < 0) continue;
373 #if defined(PETSC_USE_BOPT_g)
374         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
375 #endif
376         else {  /* off-diag entry (B) */
377           if (mat->was_assembled) {
378             if (!baij->colmap) {
379               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
380             }
381 #if defined (PETSC_USE_CTABLE)
382             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
383             col  = col - 1 + in[j]%bs;
384 #else
385             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
386 #endif
387             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
388               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
389               col =  in[j];
390               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
391               B = baij->B;
392               b = (Mat_SeqBAIJ*)(B)->data;
393               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
394               ba=b->a;
395             }
396           } else col = in[j];
397           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
398           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
399           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
400         }
401       }
402     } else {  /* off processor entry */
403       if (!baij->donotstash) {
404         n_loc = 0;
405         for (j=0; j<n; j++){
406           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
407           in_loc[n_loc] = in[j];
408           if (roworiented) {
409             v_loc[n_loc] = v[i*n+j];
410           } else {
411             v_loc[n_loc] = v[j*m+i];
412           }
413           n_loc++;
414         }
415         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
416       }
417     }
418   }
419 
420   if(!baij->donotstash){
421     ierr = PetscFree(in_loc);CHKERRQ(ierr);
422     ierr = PetscFree(v_loc);CHKERRQ(ierr);
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNC__
428 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
429 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
430 {
431   PetscFunctionBegin;
432   SETERRQ(1,"Function not yet written for SBAIJ format");
433   /* PetscFunctionReturn(0); */
434 }
435 
436 #define HASH_KEY 0.6180339887
437 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
438 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
439 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
440 #undef __FUNC__
441 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar"
442 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
443 {
444   PetscFunctionBegin;
445   SETERRQ(1,"Function not yet written for SBAIJ format");
446   /* PetscFunctionReturn(0); */
447 }
448 
449 #undef __FUNC__
450 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
451 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
452 {
453   PetscFunctionBegin;
454   SETERRQ(1,"Function not yet written for SBAIJ format");
455   /* PetscFunctionReturn(0); */
456 }
457 
458 #undef __FUNC__
459 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ"
460 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
461 {
462   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
463   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
464   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
465 
466   PetscFunctionBegin;
467   for (i=0; i<m; i++) {
468     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
469     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
470     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
471       row = idxm[i] - bsrstart;
472       for (j=0; j<n; j++) {
473         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
474         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
475         if (idxn[j] >= bscstart && idxn[j] < bscend){
476           col = idxn[j] - bscstart;
477           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
478         } else {
479           if (!baij->colmap) {
480             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
481           }
482 #if defined (PETSC_USE_CTABLE)
483           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
484           data --;
485 #else
486           data = baij->colmap[idxn[j]/bs]-1;
487 #endif
488           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
489           else {
490             col  = data + idxn[j]%bs;
491             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
492           }
493         }
494       }
495     } else {
496       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
497     }
498   }
499  PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNC__
503 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ"
504 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
505 {
506   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
507   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
508   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
509   int        ierr;
510   PetscReal  sum[2],*lnorm2;
511 
512   PetscFunctionBegin;
513   if (baij->size == 1) {
514     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
515   } else {
516     if (type == NORM_FROBENIUS) {
517       lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2);
518       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
519       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
520       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
521       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
522       /*
523       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
524       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
525       */
526       ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
527       /*
528       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
529       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
530 
531       *norm = sqrt(sum[0] + 2*sum[1]);
532       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
533     } else {
534       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
535     }
536   }
537   PetscFunctionReturn(0);
538 }
539 
540 /*
541   Creates the hash table, and sets the table
542   This table is created only once.
543   If new entried need to be added to the matrix
544   then the hash table has to be destroyed and
545   recreated.
546 */
547 #undef __FUNC__
548 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private"
549 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
550 {
551   PetscFunctionBegin;
552   SETERRQ(1,"Function not yet written for SBAIJ format");
553   /* PetscFunctionReturn(0); */
554 }
555 
556 #undef __FUNC__
557 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ"
558 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
559 {
560   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
561   int         ierr,nstash,reallocs;
562   InsertMode  addv;
563 
564   PetscFunctionBegin;
565   if (baij->donotstash) {
566     PetscFunctionReturn(0);
567   }
568 
569   /* make sure all processors are either in INSERTMODE or ADDMODE */
570   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
571   if (addv == (ADD_VALUES|INSERT_VALUES)) {
572     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
573   }
574   mat->insertmode = addv; /* in case this processor had no cache */
575 
576   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
577   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
578   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
579   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
580   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
581   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNC__
586 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ"
587 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
588 {
589   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
590   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
591   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
592   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
593   int         *row,*col,other_disassembled;
594   PetscTruth  r1,r2,r3;
595   MatScalar   *val;
596   InsertMode  addv = mat->insertmode;
597   int         rank;
598 
599   PetscFunctionBegin;
600   /* remove 2 line below later */
601   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
602 
603   if (!baij->donotstash) {
604     while (1) {
605       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
606       /*
607       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
608       PetscSynchronizedFlush(PETSC_COMM_WORLD);
609       */
610       if (!flg) break;
611 
612       for (i=0; i<n;) {
613         /* Now identify the consecutive vals belonging to the same row */
614         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
615         if (j < n) ncols = j-i;
616         else       ncols = n-i;
617         /* Now assemble all these values with a single function call */
618         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
619         i = j;
620       }
621     }
622     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
623     /* Now process the block-stash. Since the values are stashed column-oriented,
624        set the roworiented flag to column oriented, and after MatSetValues()
625        restore the original flags */
626     r1 = baij->roworiented;
627     r2 = a->roworiented;
628     r3 = b->roworiented;
629     baij->roworiented = PETSC_FALSE;
630     a->roworiented    = PETSC_FALSE;
631     b->roworiented    = PETSC_FALSE;
632     while (1) {
633       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
634       if (!flg) break;
635 
636       for (i=0; i<n;) {
637         /* Now identify the consecutive vals belonging to the same row */
638         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
639         if (j < n) ncols = j-i;
640         else       ncols = n-i;
641         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
642         i = j;
643       }
644     }
645     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
646     baij->roworiented = r1;
647     a->roworiented    = r2;
648     b->roworiented    = r3;
649   }
650 
651   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
652   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
653 
654   /* determine if any processor has disassembled, if so we must
655      also disassemble ourselfs, in order that we may reassemble. */
656   /*
657      if nonzero structure of submatrix B cannot change then we know that
658      no processor disassembled thus we can skip this stuff
659   */
660   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
661     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
662     if (mat->was_assembled && !other_disassembled) {
663       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
664     }
665   }
666 
667   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
668     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
669   }
670   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
671   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
672 
673 #if defined(PETSC_USE_BOPT_g)
674   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
675     PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
676     baij->ht_total_ct  = 0;
677     baij->ht_insert_ct = 0;
678   }
679 #endif
680   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
681     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
682     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
683     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
684   }
685 
686   if (baij->rowvalues) {
687     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
688     baij->rowvalues = 0;
689   }
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNC__
694 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket"
695 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
696 {
697   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)mat->data;
698   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
699   PetscTruth   isascii,isdraw;
700   Viewer       sviewer;
701 
702   PetscFunctionBegin;
703   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
704   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
705   if (isascii) {
706     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
707     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
708       MatInfo info;
709       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
710       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
711       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
712               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
713               baij->bs,(int)info.memory);CHKERRQ(ierr);
714       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
715       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
716       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
717       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
718       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
719       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
720       PetscFunctionReturn(0);
721     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
722       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
723       PetscFunctionReturn(0);
724     }
725   }
726 
727   if (isdraw) {
728     Draw       draw;
729     PetscTruth isnull;
730     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
731     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
732   }
733 
734   if (size == 1) {
735     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
736   } else {
737     /* assemble the entire matrix onto first processor. */
738     Mat         A;
739     Mat_SeqSBAIJ *Aloc;
740     Mat_SeqBAIJ *Bloc;
741     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
742     MatScalar   *a;
743 
744     if (!rank) {
745       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
746     } else {
747       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
748     }
749     PLogObjectParent(mat,A);
750 
751     /* copy over the A part */
752     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
753     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
754     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
755 
756     for (i=0; i<mbs; i++) {
757       rvals[0] = bs*(baij->rstart + i);
758       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
759       for (j=ai[i]; j<ai[i+1]; j++) {
760         col = (baij->cstart+aj[j])*bs;
761         for (k=0; k<bs; k++) {
762           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
763           col++; a += bs;
764         }
765       }
766     }
767     /* copy over the B part */
768     Bloc = (Mat_SeqBAIJ*)baij->B->data;
769     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
770     for (i=0; i<mbs; i++) {
771       rvals[0] = bs*(baij->rstart + i);
772       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
773       for (j=ai[i]; j<ai[i+1]; j++) {
774         col = baij->garray[aj[j]]*bs;
775         for (k=0; k<bs; k++) {
776           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
777           col++; a += bs;
778         }
779       }
780     }
781     ierr = PetscFree(rvals);CHKERRQ(ierr);
782     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
783     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
784     /*
785        Everyone has to call to draw the matrix since the graphics waits are
786        synchronized across all processors that share the Draw object
787     */
788     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
789     if (!rank) {
790       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
791     }
792     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
793     ierr = MatDestroy(A);CHKERRQ(ierr);
794   }
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNC__
799 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ"
800 int MatView_MPISBAIJ(Mat mat,Viewer viewer)
801 {
802   int        ierr;
803   PetscTruth isascii,isdraw,issocket,isbinary;
804 
805   PetscFunctionBegin;
806   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
807   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
808   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
809   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
810   if (isascii || isdraw || issocket || isbinary) {
811     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
812   } else {
813     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
814   }
815   PetscFunctionReturn(0);
816 }
817 
818 #undef __FUNC__
819 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ"
820 int MatDestroy_MPISBAIJ(Mat mat)
821 {
822   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
823   int         ierr;
824 
825   PetscFunctionBegin;
826 
827   if (mat->mapping) {
828     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
829   }
830   if (mat->bmapping) {
831     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
832   }
833   if (mat->rmap) {
834     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
835   }
836   if (mat->cmap) {
837     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
838   }
839 #if defined(PETSC_USE_LOG)
840   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
841 #endif
842 
843   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
844   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
845 
846   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
847   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
848   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
849 #if defined (PETSC_USE_CTABLE)
850   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
851 #else
852   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
853 #endif
854   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
855   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
856   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
857   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
858   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
859   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
860 #if defined(PETSC_USE_MAT_SINGLE)
861   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
862 #endif
863   ierr = PetscFree(baij);CHKERRQ(ierr);
864   PLogObjectDestroy(mat);
865   PetscHeaderDestroy(mat);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNC__
870 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ"
871 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
872 {
873   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
874   int         ierr,nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != a->n) {
879     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
880   }
881   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
882   if (nt != a->m) {
883     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
884   }
885 
886   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
887   /* do diagonal part */
888   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
889   /* do supperdiagonal part */
890   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
891   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
892   /* do subdiagonal part */
893   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
894   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
895   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
896 
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNC__
901 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ"
902 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
903 {
904   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
905   int        ierr;
906 
907   PetscFunctionBegin;
908   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
909   /* do diagonal part */
910   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
911   /* do supperdiagonal part */
912   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
913   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
914 
915   /* do subdiagonal part */
916   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
917   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
918   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
919 
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNC__
924 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ"
925 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
926 {
927   PetscFunctionBegin;
928   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
929   /* PetscFunctionReturn(0); */
930 }
931 
932 #undef __FUNC__
933 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ"
934 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   PetscFunctionBegin;
937   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
938   /* PetscFunctionReturn(0); */
939 }
940 
941 /*
942   This only works correctly for square matrices where the subblock A->A is the
943    diagonal block
944 */
945 #undef __FUNC__
946 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ"
947 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
948 {
949   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
950   int         ierr;
951 
952   PetscFunctionBegin;
953   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
954   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNC__
959 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ"
960 int MatScale_MPISBAIJ(Scalar *aa,Mat A)
961 {
962   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
963   int         ierr;
964 
965   PetscFunctionBegin;
966   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
967   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNC__
972 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ"
973 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n)
974 {
975   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
976 
977   PetscFunctionBegin;
978   if (m) *m = mat->M;
979   if (n) *n = mat->N;
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNC__
984 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ"
985 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n)
986 {
987   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
988 
989   PetscFunctionBegin;
990   *m = mat->m; *n = mat->n;
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNC__
995 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ"
996 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
997 {
998   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
999 
1000   PetscFunctionBegin;
1001   if (m) *m = mat->rstart*mat->bs;
1002   if (n) *n = mat->rend*mat->bs;
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNC__
1007 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ"
1008 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1009 {
1010   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1011   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1012   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1013   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1014   int        *cmap,*idx_p,cstart = mat->cstart;
1015 
1016   PetscFunctionBegin;
1017   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1018   mat->getrowactive = PETSC_TRUE;
1019 
1020   if (!mat->rowvalues && (idx || v)) {
1021     /*
1022         allocate enough space to hold information from the longest row.
1023     */
1024     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1025     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1026     int     max = 1,mbs = mat->mbs,tmp;
1027     for (i=0; i<mbs; i++) {
1028       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1029       if (max < tmp) { max = tmp; }
1030     }
1031     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1032     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1033   }
1034 
1035   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1036   lrow = row - brstart;  /* local row index */
1037 
1038   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1039   if (!v)   {pvA = 0; pvB = 0;}
1040   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1041   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1042   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1043   nztot = nzA + nzB;
1044 
1045   cmap  = mat->garray;
1046   if (v  || idx) {
1047     if (nztot) {
1048       /* Sort by increasing column numbers, assuming A and B already sorted */
1049       int imark = -1;
1050       if (v) {
1051         *v = v_p = mat->rowvalues;
1052         for (i=0; i<nzB; i++) {
1053           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1054           else break;
1055         }
1056         imark = i;
1057         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1058         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1059       }
1060       if (idx) {
1061         *idx = idx_p = mat->rowindices;
1062         if (imark > -1) {
1063           for (i=0; i<imark; i++) {
1064             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1065           }
1066         } else {
1067           for (i=0; i<nzB; i++) {
1068             if (cmap[cworkB[i]/bs] < cstart)
1069               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1070             else break;
1071           }
1072           imark = i;
1073         }
1074         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1075         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1076       }
1077     } else {
1078       if (idx) *idx = 0;
1079       if (v)   *v   = 0;
1080     }
1081   }
1082   *nz = nztot;
1083   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1084   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNC__
1089 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ"
1090 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1091 {
1092   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1093 
1094   PetscFunctionBegin;
1095   if (baij->getrowactive == PETSC_FALSE) {
1096     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1097   }
1098   baij->getrowactive = PETSC_FALSE;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ"
1104 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1105 {
1106   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1107 
1108   PetscFunctionBegin;
1109   *bs = baij->bs;
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNC__
1114 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ"
1115 int MatZeroEntries_MPISBAIJ(Mat A)
1116 {
1117   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1118   int         ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1122   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNC__
1127 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ"
1128 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1129 {
1130   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1131   Mat         A = a->A,B = a->B;
1132   int         ierr;
1133   PetscReal   isend[5],irecv[5];
1134 
1135   PetscFunctionBegin;
1136   info->block_size     = (double)a->bs;
1137   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1138   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1139   isend[3] = info->memory;  isend[4] = info->mallocs;
1140   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1141   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1142   isend[3] += info->memory;  isend[4] += info->mallocs;
1143   if (flag == MAT_LOCAL) {
1144     info->nz_used      = isend[0];
1145     info->nz_allocated = isend[1];
1146     info->nz_unneeded  = isend[2];
1147     info->memory       = isend[3];
1148     info->mallocs      = isend[4];
1149   } else if (flag == MAT_GLOBAL_MAX) {
1150     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1151     info->nz_used      = irecv[0];
1152     info->nz_allocated = irecv[1];
1153     info->nz_unneeded  = irecv[2];
1154     info->memory       = irecv[3];
1155     info->mallocs      = irecv[4];
1156   } else if (flag == MAT_GLOBAL_SUM) {
1157     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1158     info->nz_used      = irecv[0];
1159     info->nz_allocated = irecv[1];
1160     info->nz_unneeded  = irecv[2];
1161     info->memory       = irecv[3];
1162     info->mallocs      = irecv[4];
1163   } else {
1164     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1165   }
1166   info->rows_global       = (double)a->M;
1167   info->columns_global    = (double)a->N;
1168   info->rows_local        = (double)a->m;
1169   info->columns_local     = (double)a->N;
1170   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1171   info->fill_ratio_needed = 0;
1172   info->factor_mallocs    = 0;
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNC__
1177 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ"
1178 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1179 {
1180   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1181   int         ierr;
1182 
1183   PetscFunctionBegin;
1184   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1185       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1186       op == MAT_COLUMNS_UNSORTED ||
1187       op == MAT_COLUMNS_SORTED ||
1188       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1189       op == MAT_KEEP_ZEROED_ROWS ||
1190       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1191         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1192         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1193   } else if (op == MAT_ROW_ORIENTED) {
1194         a->roworiented = PETSC_TRUE;
1195         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1196         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1197   } else if (op == MAT_ROWS_SORTED ||
1198              op == MAT_ROWS_UNSORTED ||
1199              op == MAT_SYMMETRIC ||
1200              op == MAT_STRUCTURALLY_SYMMETRIC ||
1201              op == MAT_YES_NEW_DIAGONALS ||
1202              op == MAT_USE_HASH_TABLE) {
1203     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1204   } else if (op == MAT_COLUMN_ORIENTED) {
1205     a->roworiented = PETSC_FALSE;
1206     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1208   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1209     a->donotstash = PETSC_TRUE;
1210   } else if (op == MAT_NO_NEW_DIAGONALS) {
1211     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1212   } else if (op == MAT_USE_HASH_TABLE) {
1213     a->ht_flag = PETSC_TRUE;
1214   } else {
1215     SETERRQ(PETSC_ERR_SUP,"unknown option");
1216   }
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNC__
1221 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ("
1222 int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1223 {
1224   PetscFunctionBegin;
1225   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
1226   /* PetscFunctionReturn(0); */
1227 }
1228 
1229 #undef __FUNC__
1230 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ"
1231 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1232 {
1233   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1234   Mat         a = baij->A,b = baij->B;
1235   int         ierr,s1,s2,s3;
1236 
1237   PetscFunctionBegin;
1238   if (ll != rr) {
1239     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1240   }
1241   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1242   if (rr) {
1243     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1244     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1245     /* Overlap communication with computation. */
1246     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1247     /*} if (ll) { */
1248     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1249     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1250     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1251     /* } */
1252   /* scale  the diagonal block */
1253   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1254 
1255   /* if (rr) { */
1256     /* Do a scatter end and then right scale the off-diagonal block */
1257     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1258     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1259   }
1260 
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNC__
1265 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ"
1266 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1267 {
1268   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1269   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1270   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1271   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1272   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1273   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1274   MPI_Comm       comm = A->comm;
1275   MPI_Request    *send_waits,*recv_waits;
1276   MPI_Status     recv_status,*send_status;
1277   IS             istmp;
1278 
1279   PetscFunctionBegin;
1280   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1281   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1282 
1283   /*  first count number of contributors to each processor */
1284   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1285   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1286   procs  = nprocs + size;
1287   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1288   for (i=0; i<N; i++) {
1289     idx   = rows[i];
1290     found = 0;
1291     for (j=0; j<size; j++) {
1292       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1293         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1294       }
1295     }
1296     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1297   }
1298   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1299 
1300   /* inform other processors of number of messages and max length*/
1301   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1302   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1303   nmax   = work[rank];
1304   nrecvs = work[size+rank];
1305   ierr = PetscFree(work);CHKERRQ(ierr);
1306 
1307   /* post receives:   */
1308   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1309   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1310   for (i=0; i<nrecvs; i++) {
1311     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1312   }
1313 
1314   /* do sends:
1315      1) starts[i] gives the starting index in svalues for stuff going to
1316      the ith processor
1317   */
1318   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1319   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1320   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1321   starts[0]  = 0;
1322   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1323   for (i=0; i<N; i++) {
1324     svalues[starts[owner[i]]++] = rows[i];
1325   }
1326   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1327 
1328   starts[0] = 0;
1329   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1330   count = 0;
1331   for (i=0; i<size; i++) {
1332     if (procs[i]) {
1333       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1334     }
1335   }
1336   ierr = PetscFree(starts);CHKERRQ(ierr);
1337 
1338   base = owners[rank]*bs;
1339 
1340   /*  wait on receives */
1341   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1342   source = lens + nrecvs;
1343   count  = nrecvs; slen = 0;
1344   while (count) {
1345     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1346     /* unpack receives into our local space */
1347     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1348     source[imdex]  = recv_status.MPI_SOURCE;
1349     lens[imdex]    = n;
1350     slen          += n;
1351     count--;
1352   }
1353   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1354 
1355   /* move the data into the send scatter */
1356   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1357   count = 0;
1358   for (i=0; i<nrecvs; i++) {
1359     values = rvalues + i*nmax;
1360     for (j=0; j<lens[i]; j++) {
1361       lrows[count++] = values[j] - base;
1362     }
1363   }
1364   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1365   ierr = PetscFree(lens);CHKERRQ(ierr);
1366   ierr = PetscFree(owner);CHKERRQ(ierr);
1367   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1368 
1369   /* actually zap the local rows */
1370   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1371   PLogObjectParent(A,istmp);
1372 
1373   /*
1374         Zero the required rows. If the "diagonal block" of the matrix
1375      is square and the user wishes to set the diagonal we use seperate
1376      code so that MatSetValues() is not called for each diagonal allocating
1377      new memory, thus calling lots of mallocs and slowing things down.
1378 
1379        Contributed by: Mathew Knepley
1380   */
1381   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1382   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1383   if (diag && (l->A->M == l->A->N)) {
1384     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1385   } else if (diag) {
1386     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1387     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1388       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1389 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1390     }
1391     for (i=0; i<slen; i++) {
1392       row  = lrows[i] + rstart_bs;
1393       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1394     }
1395     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1396     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397   } else {
1398     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1399   }
1400 
1401   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1402   ierr = PetscFree(lrows);CHKERRQ(ierr);
1403 
1404   /* wait on sends */
1405   if (nsends) {
1406     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1407     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1408     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1409   }
1410   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1411   ierr = PetscFree(svalues);CHKERRQ(ierr);
1412 
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNC__
1417 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ"
1418 int MatPrintHelp_MPISBAIJ(Mat A)
1419 {
1420   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1421   MPI_Comm    comm = A->comm;
1422   static int  called = 0;
1423   int         ierr;
1424 
1425   PetscFunctionBegin;
1426   if (!a->rank) {
1427     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1428   }
1429   if (called) {PetscFunctionReturn(0);} else called = 1;
1430   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1431   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNC__
1436 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ"
1437 int MatSetUnfactored_MPISBAIJ(Mat A)
1438 {
1439   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1440   int         ierr;
1441 
1442   PetscFunctionBegin;
1443   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1448 
1449 #undef __FUNC__
1450 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ"
1451 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1452 {
1453   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1454   Mat         a,b,c,d;
1455   PetscTruth  flg;
1456   int         ierr;
1457 
1458   PetscFunctionBegin;
1459   if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1460   a = matA->A; b = matA->B;
1461   c = matB->A; d = matB->B;
1462 
1463   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1464   if (flg == PETSC_TRUE) {
1465     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1466   }
1467   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 /* -------------------------------------------------------------------*/
1472 static struct _MatOps MatOps_Values = {
1473   MatSetValues_MPISBAIJ,
1474   MatGetRow_MPISBAIJ,
1475   MatRestoreRow_MPISBAIJ,
1476   MatMult_MPISBAIJ,
1477   MatMultAdd_MPISBAIJ,
1478   MatMultTranspose_MPISBAIJ,
1479   MatMultTransposeAdd_MPISBAIJ,
1480   0,
1481   0,
1482   0,
1483   0,
1484   0,
1485   0,
1486   0,
1487   MatTranspose_MPISBAIJ,
1488   MatGetInfo_MPISBAIJ,
1489   MatEqual_MPISBAIJ,
1490   MatGetDiagonal_MPISBAIJ,
1491   MatDiagonalScale_MPISBAIJ,
1492   MatNorm_MPISBAIJ,
1493   MatAssemblyBegin_MPISBAIJ,
1494   MatAssemblyEnd_MPISBAIJ,
1495   0,
1496   MatSetOption_MPISBAIJ,
1497   MatZeroEntries_MPISBAIJ,
1498   MatZeroRows_MPISBAIJ,
1499   0,
1500   0,
1501   0,
1502   0,
1503   MatGetSize_MPISBAIJ,
1504   MatGetLocalSize_MPISBAIJ,
1505   MatGetOwnershipRange_MPISBAIJ,
1506   0,
1507   0,
1508   0,
1509   0,
1510   MatDuplicate_MPISBAIJ,
1511   0,
1512   0,
1513   0,
1514   0,
1515   0,
1516   MatGetSubMatrices_MPISBAIJ,
1517   MatIncreaseOverlap_MPISBAIJ,
1518   MatGetValues_MPISBAIJ,
1519   0,
1520   MatPrintHelp_MPISBAIJ,
1521   MatScale_MPISBAIJ,
1522   0,
1523   0,
1524   0,
1525   MatGetBlockSize_MPISBAIJ,
1526   0,
1527   0,
1528   0,
1529   0,
1530   0,
1531   0,
1532   MatSetUnfactored_MPISBAIJ,
1533   0,
1534   MatSetValuesBlocked_MPISBAIJ,
1535   0,
1536   0,
1537   0,
1538   MatGetMaps_Petsc};
1539 
1540 
1541 EXTERN_C_BEGIN
1542 #undef __FUNC__
1543 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ"
1544 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1545 {
1546   PetscFunctionBegin;
1547   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1548   *iscopy = PETSC_FALSE;
1549   PetscFunctionReturn(0);
1550 }
1551 EXTERN_C_END
1552 
1553 #undef __FUNC__
1554 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ"
1555 /*@C
1556    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1557    (block compressed row).  For good matrix assembly performance
1558    the user should preallocate the matrix storage by setting the parameters
1559    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1560    performance can be increased by more than a factor of 50.
1561 
1562    Collective on MPI_Comm
1563 
1564    Input Parameters:
1565 +  comm - MPI communicator
1566 .  bs   - size of blockk
1567 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1568            This value should be the same as the local size used in creating the
1569            y vector for the matrix-vector product y = Ax.
1570 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1571            This value should be the same as the local size used in creating the
1572            x vector for the matrix-vector product y = Ax.
1573 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1574 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1575 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1576            submatrix  (same for all local rows)
1577 .  d_nnz - array containing the number of block nonzeros in the various block rows
1578            of the in diagonal portion of the local (possibly different for each block
1579            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1580 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1581            submatrix (same for all local rows).
1582 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1583            off-diagonal portion of the local submatrix (possibly different for
1584            each block row) or PETSC_NULL.
1585 
1586    Output Parameter:
1587 .  A - the matrix
1588 
1589    Options Database Keys:
1590 .   -mat_no_unroll - uses code that does not unroll the loops in the
1591                      block calculations (much slower)
1592 .   -mat_block_size - size of the blocks to use
1593 .   -mat_mpi - use the parallel matrix data structures even on one processor
1594                (defaults to using SeqBAIJ format on one processor)
1595 
1596    Notes:
1597    The user MUST specify either the local or global matrix dimensions
1598    (possibly both).
1599 
1600    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1601    than it must be used on all processors that share the object for that argument.
1602 
1603    Storage Information:
1604    For a square global matrix we define each processor's diagonal portion
1605    to be its local rows and the corresponding columns (a square submatrix);
1606    each processor's off-diagonal portion encompasses the remainder of the
1607    local matrix (a rectangular submatrix).
1608 
1609    The user can specify preallocated storage for the diagonal part of
1610    the local submatrix with either d_nz or d_nnz (not both).  Set
1611    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1612    memory allocation.  Likewise, specify preallocated storage for the
1613    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1614 
1615    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1616    the figure below we depict these three local rows and all columns (0-11).
1617 
1618 .vb
1619            0 1 2 3 4 5 6 7 8 9 10 11
1620           -------------------
1621    row 3  |  o o o d d d o o o o o o
1622    row 4  |  o o o d d d o o o o o o
1623    row 5  |  o o o d d d o o o o o o
1624           -------------------
1625 .ve
1626 
1627    Thus, any entries in the d locations are stored in the d (diagonal)
1628    submatrix, and any entries in the o locations are stored in the
1629    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1630    stored simply in the MATSEQBAIJ format for compressed row storage.
1631 
1632    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1633    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1634    In general, for PDE problems in which most nonzeros are near the diagonal,
1635    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1636    or you will get TERRIBLE performance; see the users' manual chapter on
1637    matrices.
1638 
1639    Level: intermediate
1640 
1641 .keywords: matrix, block, aij, compressed row, sparse, parallel
1642 
1643 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1644 @*/
1645 
1646 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1647 {
1648   Mat          B;
1649   Mat_MPISBAIJ  *b;
1650   int          ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size;
1651   PetscTruth   flag1,flag2,flg;
1652 
1653   PetscFunctionBegin;
1654   if (M != N || m != n){ /* N and n are not used after this */
1655     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, set M=N and m=n");
1656   }
1657   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1658 
1659   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1660   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -2: value %d",d_nz);
1661   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -2: value %d",o_nz);
1662   if (d_nnz) {
1663     for (i=0; i<m/bs; i++) {
1664       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1665     }
1666   }
1667   if (o_nnz) {
1668     for (i=0; i<m/bs; i++) {
1669       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1670     }
1671   }
1672 
1673   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1674   ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr);
1675   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1676   if (!flag1 && !flag2 && size == 1) {
1677     if (M == PETSC_DECIDE) M = m;
1678     ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr);
1679     PetscFunctionReturn(0);
1680   }
1681 
1682   *A = 0;
1683   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView);
1684   PLogObjectCreate(B);
1685   B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b);
1686   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1687   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1688 
1689   B->ops->destroy    = MatDestroy_MPISBAIJ;
1690   B->ops->view       = MatView_MPISBAIJ;
1691   B->mapping    = 0;
1692   B->factor     = 0;
1693   B->assembled  = PETSC_FALSE;
1694 
1695   B->insertmode = NOT_SET_VALUES;
1696   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1697   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
1698 
1699   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
1700     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1701   }
1702   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
1703     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either M or m should be specified");
1704   }
1705   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1706 
1707   if (M == PETSC_DECIDE) {
1708     work[0] = m; mbs = m/bs;
1709     ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1710     M = sum[0]; Mbs = M/bs;
1711   } else { /* M is specified */
1712     Mbs = M/bs;
1713     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global rows must be divisible by blocksize");
1714     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1715     m   = mbs*bs;
1716   }
1717 
1718   if (mbs*bs != m) {
1719     SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows/cols must be divisible by blocksize");
1720   }
1721 
1722   b->m = m; B->m = m;
1723   b->n = m; B->n = m;
1724   b->N = M; B->N = M;
1725   b->M = M;
1726   B->M = M;
1727   b->bs  = bs;
1728   b->bs2 = bs*bs;
1729   b->mbs = mbs;
1730   b->nbs = mbs;
1731   b->Mbs = Mbs;
1732   b->Nbs = Mbs;
1733 
1734   /* the information in the maps duplicates the information computed below, eventually
1735      we should remove the duplicate information that is not contained in the maps */
1736   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
1737   ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr);
1738 
1739   /* build local table of row and column ownerships */
1740   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1741   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1742   b->cowners    = b->rowners + b->size + 2;
1743   b->rowners_bs = b->cowners + b->size + 2;
1744   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1745   b->rowners[0]    = 0;
1746   for (i=2; i<=b->size; i++) {
1747     b->rowners[i] += b->rowners[i-1];
1748   }
1749   for (i=0; i<=b->size; i++) {
1750     b->rowners_bs[i] = b->rowners[i]*bs;
1751   }
1752   b->rstart    = b->rowners[b->rank];
1753   b->rend      = b->rowners[b->rank+1];
1754   b->rstart_bs = b->rstart * bs;
1755   b->rend_bs   = b->rend * bs;
1756 
1757   b->cstart    = b->rstart;
1758   b->cend      = b->rend;
1759   b->cstart_bs = b->cstart * bs;
1760   b->cend_bs   = b->cend * bs;
1761 
1762 
1763   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1764   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1765   PLogObjectParent(B,b->A);
1766   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1767   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1768   PLogObjectParent(B,b->B);
1769 
1770   /* build cache for off array entries formed */
1771   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1772   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1773   b->donotstash  = PETSC_FALSE;
1774   b->colmap      = PETSC_NULL;
1775   b->garray      = PETSC_NULL;
1776   b->roworiented = PETSC_TRUE;
1777 
1778 #if defined(PEYSC_USE_MAT_SINGLE)
1779   /* stuff for MatSetValues_XXX in single precision */
1780   b->lensetvalues     = 0;
1781   b->setvaluescopy    = PETSC_NULL;
1782 #endif
1783 
1784   /* stuff used in block assembly */
1785   b->barray       = 0;
1786 
1787   /* stuff used for matrix vector multiply */
1788   b->lvec         = 0;
1789   b->Mvctx        = 0;
1790 
1791   /* stuff for MatGetRow() */
1792   b->rowindices   = 0;
1793   b->rowvalues    = 0;
1794   b->getrowactive = PETSC_FALSE;
1795 
1796   /* hash table stuff */
1797   b->ht           = 0;
1798   b->hd           = 0;
1799   b->ht_size      = 0;
1800   b->ht_flag      = PETSC_FALSE;
1801   b->ht_fact      = 0;
1802   b->ht_total_ct  = 0;
1803   b->ht_insert_ct = 0;
1804 
1805   *A = B;
1806   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1807   if (flg) {
1808     double fact = 1.39;
1809     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1810     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1811     if (fact <= 1.0) fact = 1.39;
1812     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1813     PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1814   }
1815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1816                                      "MatStoreValues_MPISBAIJ",
1817                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1819                                      "MatRetrieveValues_MPISBAIJ",
1820                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1822                                      "MatGetDiagonalBlock_MPISBAIJ",
1823                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 
1828 #undef __FUNC__
1829 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ"
1830 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1831 {
1832   Mat         mat;
1833   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1834   int         ierr,len=0;
1835   PetscTruth  flg;
1836 
1837   PetscFunctionBegin;
1838   *newmat       = 0;
1839   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView);
1840   PLogObjectCreate(mat);
1841   mat->data         = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a);
1842   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1843   mat->ops->destroy = MatDestroy_MPISBAIJ;
1844   mat->ops->view    = MatView_MPISBAIJ;
1845   mat->factor       = matin->factor;
1846   mat->assembled    = PETSC_TRUE;
1847   mat->insertmode   = NOT_SET_VALUES;
1848 
1849   a->m = mat->m   = oldmat->m;
1850   a->n = mat->n   = oldmat->n;
1851   a->M = mat->M   = oldmat->M;
1852   a->N = mat->N   = oldmat->N;
1853 
1854   a->bs  = oldmat->bs;
1855   a->bs2 = oldmat->bs2;
1856   a->mbs = oldmat->mbs;
1857   a->nbs = oldmat->nbs;
1858   a->Mbs = oldmat->Mbs;
1859   a->Nbs = oldmat->Nbs;
1860 
1861   a->rstart       = oldmat->rstart;
1862   a->rend         = oldmat->rend;
1863   a->cstart       = oldmat->cstart;
1864   a->cend         = oldmat->cend;
1865   a->size         = oldmat->size;
1866   a->rank         = oldmat->rank;
1867   a->donotstash   = oldmat->donotstash;
1868   a->roworiented  = oldmat->roworiented;
1869   a->rowindices   = 0;
1870   a->rowvalues    = 0;
1871   a->getrowactive = PETSC_FALSE;
1872   a->barray       = 0;
1873   a->rstart_bs    = oldmat->rstart_bs;
1874   a->rend_bs      = oldmat->rend_bs;
1875   a->cstart_bs    = oldmat->cstart_bs;
1876   a->cend_bs      = oldmat->cend_bs;
1877 
1878   /* hash table stuff */
1879   a->ht           = 0;
1880   a->hd           = 0;
1881   a->ht_size      = 0;
1882   a->ht_flag      = oldmat->ht_flag;
1883   a->ht_fact      = oldmat->ht_fact;
1884   a->ht_total_ct  = 0;
1885   a->ht_insert_ct = 0;
1886 
1887 
1888   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1889   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1890   a->cowners    = a->rowners + a->size + 2;
1891   a->rowners_bs = a->cowners + a->size + 2;
1892   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1893   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1894   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
1895   if (oldmat->colmap) {
1896 #if defined (PETSC_USE_CTABLE)
1897   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1898 #else
1899     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
1900     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1901     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
1902 #endif
1903   } else a->colmap = 0;
1904   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1905     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
1906     PLogObjectMemory(mat,len*sizeof(int));
1907     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
1908   } else a->garray = 0;
1909 
1910   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1911   PLogObjectParent(mat,a->lvec);
1912   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1913 
1914   PLogObjectParent(mat,a->Mvctx);
1915   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1916   PLogObjectParent(mat,a->A);
1917   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1918   PLogObjectParent(mat,a->B);
1919   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1920   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1921   if (flg) {
1922     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1923   }
1924   *newmat = mat;
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #include "petscsys.h"
1929 
1930 #undef __FUNC__
1931 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ"
1932 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat)
1933 {
1934   Mat          A;
1935   int          i,nz,ierr,j,rstart,rend,fd;
1936   Scalar       *vals,*buf;
1937   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1938   MPI_Status   status;
1939   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
1940   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
1941   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
1942   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1943   int          dcount,kmax,k,nzcount,tmp;
1944 
1945   PetscFunctionBegin;
1946   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1947 
1948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1949   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1950   if (!rank) {
1951     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1952     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1953     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1954     if (header[3] < 0) {
1955       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
1956     }
1957   }
1958 
1959   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1960   M = header[1]; N = header[2];
1961 
1962   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1963 
1964   /*
1965      This code adds extra rows to make sure the number of rows is
1966      divisible by the blocksize
1967   */
1968   Mbs        = M/bs;
1969   extra_rows = bs - M + bs*(Mbs);
1970   if (extra_rows == bs) extra_rows = 0;
1971   else                  Mbs++;
1972   if (extra_rows &&!rank) {
1973     PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
1974   }
1975 
1976   /* determine ownership of all rows */
1977   mbs = Mbs/size + ((Mbs % size) > rank);
1978   m   = mbs * bs;
1979   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
1980   browners = rowners + size + 1;
1981   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1982   rowners[0] = 0;
1983   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
1984   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
1985   rstart = rowners[rank];
1986   rend   = rowners[rank+1];
1987 
1988   /* distribute row lengths to all processors */
1989   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
1990   if (!rank) {
1991     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1992     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1993     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1994     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
1995     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
1996     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
1997     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1998   } else {
1999     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2000   }
2001 
2002   if (!rank) {   /* procs[0] */
2003     /* calculate the number of nonzeros on each processor */
2004     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2005     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2006     for (i=0; i<size; i++) {
2007       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2008         procsnz[i] += rowlengths[j];
2009       }
2010     }
2011     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2012 
2013     /* determine max buffer needed and allocate it */
2014     maxnz = 0;
2015     for (i=0; i<size; i++) {
2016       maxnz = PetscMax(maxnz,procsnz[i]);
2017     }
2018     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2019 
2020     /* read in my part of the matrix column indices  */
2021     nz = procsnz[0];
2022     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2023     mycols = ibuf;
2024     if (size == 1)  nz -= extra_rows;
2025     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2026     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2027 
2028     /* read in every ones (except the last) and ship off */
2029     for (i=1; i<size-1; i++) {
2030       nz   = procsnz[i];
2031       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2032       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2033     }
2034     /* read in the stuff for the last proc */
2035     if (size != 1) {
2036       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2037       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2038       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2039       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2040     }
2041     ierr = PetscFree(cols);CHKERRQ(ierr);
2042   } else {  /* procs[i], i>0 */
2043     /* determine buffer space needed for message */
2044     nz = 0;
2045     for (i=0; i<m; i++) {
2046       nz += locrowlens[i];
2047     }
2048     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2049     mycols = ibuf;
2050     /* receive message of column indices*/
2051     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2052     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2053     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2054   }
2055 
2056   /* loop over local rows, determining number of off diagonal entries */
2057   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2058   odlens = dlens + (rend-rstart);
2059   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2060   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2061   masked1 = mask    + Mbs;
2062   masked2 = masked1 + Mbs;
2063   rowcount = 0; nzcount = 0;
2064   for (i=0; i<mbs; i++) {
2065     dcount  = 0;
2066     odcount = 0;
2067     for (j=0; j<bs; j++) {
2068       kmax = locrowlens[rowcount];
2069       for (k=0; k<kmax; k++) {
2070         tmp = mycols[nzcount++]/bs; /* block col. index */
2071         if (!mask[tmp]) {
2072           mask[tmp] = 1;
2073           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2074           else masked1[dcount++] = tmp; /* entry in diag portion */
2075         }
2076       }
2077       rowcount++;
2078     }
2079 
2080     dlens[i]  = dcount;  /* d_nzz[i] */
2081     odlens[i] = odcount; /* o_nzz[i] */
2082 
2083     /* zero out the mask elements we set */
2084     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2085     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2086   }
2087 
2088   /* create our matrix */
2089   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2090   CHKERRQ(ierr);
2091   A = *newmat;
2092   MatSetOption(A,MAT_COLUMNS_SORTED);
2093 
2094   if (!rank) {
2095     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2096     /* read in my part of the matrix numerical values  */
2097     nz = procsnz[0];
2098     vals = buf;
2099     mycols = ibuf;
2100     if (size == 1)  nz -= extra_rows;
2101     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2102     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2103 
2104     /* insert into matrix */
2105     jj      = rstart*bs;
2106     for (i=0; i<m; i++) {
2107       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2108       mycols += locrowlens[i];
2109       vals   += locrowlens[i];
2110       jj++;
2111     }
2112 
2113     /* read in other processors (except the last one) and ship out */
2114     for (i=1; i<size-1; i++) {
2115       nz   = procsnz[i];
2116       vals = buf;
2117       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2118       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2119     }
2120     /* the last proc */
2121     if (size != 1){
2122       nz   = procsnz[i] - extra_rows;
2123       vals = buf;
2124       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2125       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2126       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2127     }
2128     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2129 
2130   } else {
2131     /* receive numeric values */
2132     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2133 
2134     /* receive message of values*/
2135     vals   = buf;
2136     mycols = ibuf;
2137     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2138     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2139     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2140 
2141     /* insert into matrix */
2142     jj      = rstart*bs;
2143     for (i=0; i<m; i++) {
2144       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2145       mycols += locrowlens[i];
2146       vals   += locrowlens[i];
2147       jj++;
2148     }
2149   }
2150 
2151   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2152   ierr = PetscFree(buf);CHKERRQ(ierr);
2153   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2154   ierr = PetscFree(rowners);CHKERRQ(ierr);
2155   ierr = PetscFree(dlens);CHKERRQ(ierr);
2156   ierr = PetscFree(mask);CHKERRQ(ierr);
2157   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 #undef __FUNC__
2163 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor"
2164 /*@
2165    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2166 
2167    Input Parameters:
2168 .  mat  - the matrix
2169 .  fact - factor
2170 
2171    Collective on Mat
2172 
2173    Level: advanced
2174 
2175   Notes:
2176    This can also be set by the command line option: -mat_use_hash_table fact
2177 
2178 .keywords: matrix, hashtable, factor, HT
2179 
2180 .seealso: MatSetOption()
2181 @*/
2182 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2183 {
2184   PetscFunctionBegin;
2185   SETERRQ(1,"Function not yet written for SBAIJ format");
2186   /* PetscFunctionReturn(0); */
2187 }
2188