xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision f102d2d3ba4052999c8b59e4b1ab65578f0fd3c9)
1 /*$Id: mpisbaij.c,v 1.16 2000/08/30 20:48:25 hzhang Exp hzhang $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19 
20 /*  UGLY, ugly, ugly
21    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
22    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
23    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
24    converts the entries into single precision and then calls ..._MatScalar() to put them
25    into the single precision data structures.
26 */
27 #if defined(PETSC_USE_MAT_SINGLE)
28 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 #else
34 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
35 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
36 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
37 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
38 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
39 #endif
40 
41 EXTERN_C_BEGIN
42 #undef __FUNC__
43 #define __FUNC__ /*<a name="MatStoreValues_MPISBAIJ"></a>*/"MatStoreValues_MPISBAIJ"
44 int MatStoreValues_MPISBAIJ(Mat mat)
45 {
46   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
47   int         ierr;
48 
49   PetscFunctionBegin;
50   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
51   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
52   PetscFunctionReturn(0);
53 }
54 EXTERN_C_END
55 
56 EXTERN_C_BEGIN
57 #undef __FUNC__
58 #define __FUNC__ /*<a name="MatRetrieveValues_MPISBAIJ"></a>*/"MatRetrieveValues_MPISBAIJ"
59 int MatRetrieveValues_MPISBAIJ(Mat mat)
60 {
61   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
62   int         ierr;
63 
64   PetscFunctionBegin;
65   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
66   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
67   PetscFunctionReturn(0);
68 }
69 EXTERN_C_END
70 
71 /*
72      Local utility routine that creates a mapping from the global column
73    number to the local number in the off-diagonal part of the local
74    storage of the matrix.  This is done in a non scable way since the
75    length of colmap equals the global matrix length.
76 */
77 #undef __FUNC__
78 #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private"
79 static int CreateColmap_MPISBAIJ_Private(Mat mat)
80 {
81   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
82   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
83   int         nbs = B->nbs,i,bs=B->bs,ierr;
84 
85   PetscFunctionBegin;
86   SETERRQ(1,1,"Function not yet written for SBAIJ format");
87   PetscFunctionReturn(0);
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
123  \
124         /* malloc new storage space */ \
125         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
127         new_j   = (int*)(new_a + bs2*new_nz); \
128         new_i   = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
199  \
200         /* malloc new storage space */ \
201         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
203         new_j   = (int*)(new_a + bs2*new_nz); \
204         new_i   = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNC__
248 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int         ierr,i,N = m*n;
253   MatScalar   *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNC__
271 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNC__
293 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,1,"Function not yet written for SBAIJ format");
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNC__
306 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,1,"Function not yet written for SBAIJ format");
315   PetscFunctionReturn(0);
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNC__
323 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar   value;
328   int         ierr,i,j,row,col;
329   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
330   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
331   int         cend_orig=baij->cend_bs,bs=baij->bs;
332 
333   /* Some Variables required in the macro */
334   Mat         A = baij->A;
335   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
336   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
337   MatScalar   *aa=a->a;
338 
339   Mat         B = baij->B;
340   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
341   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
342   MatScalar   *ba=b->a;
343 
344   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
345   int         low,high,t,ridx,cidx,bs2=a->bs2;
346   MatScalar   *ap,*bap;
347 
348   /* for stash */
349   int         n_loc, *in_loc=0;
350   MatScalar   *v_loc=0;
351 
352   PetscFunctionBegin;
353 
354   if(!baij->donotstash){
355     in_loc = (int*)PetscMalloc(n*sizeof(int));CHKPTRQ(in_loc);
356     v_loc  = (MatScalar*)PetscMalloc(n*sizeof(MatScalar));CHKPTRQ(v_loc);
357   }
358 
359   for (i=0; i<m; i++) {
360     if (im[i] < 0) continue;
361 #if defined(PETSC_USE_BOPT_g)
362     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
363 #endif
364     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
365       row = im[i] - rstart_orig;              /* local row index */
366       for (j=0; j<n; j++) {
367         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
368         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
369           col = in[j] - cstart_orig;          /* local col index */
370           brow = row/bs; bcol = col/bs;
371           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
372           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
373           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
374           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
375         } else if (in[j] < 0) continue;
376 #if defined(PETSC_USE_BOPT_g)
377         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
378 #endif
379         else {  /* off-diag entry (B) */
380           if (mat->was_assembled) {
381             if (!baij->colmap) {
382               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
383             }
384 #if defined (PETSC_USE_CTABLE)
385             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
386             col  = col - 1 + in[j]%bs;
387 #else
388             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
389 #endif
390             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
391               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
392               col =  in[j];
393               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
394               B = baij->B;
395               b = (Mat_SeqBAIJ*)(B)->data;
396               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
397               ba=b->a;
398             }
399           } else col = in[j];
400           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
401           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
402           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
403         }
404       }
405     } else {  /* off processor entry */
406       if (!baij->donotstash) {
407         n_loc = 0;
408         for (j=0; j<n; j++){
409           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
410           in_loc[n_loc] = in[j];
411           if (roworiented) {
412             v_loc[n_loc] = v[i*n+j];
413           } else {
414             v_loc[n_loc] = v[j*m+i];
415           }
416           n_loc++;
417         }
418         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
419       }
420     }
421   }
422 
423   if(!baij->donotstash){
424     ierr = PetscFree(in_loc);CHKERRQ(ierr);
425     ierr = PetscFree(v_loc);CHKERRQ(ierr);
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNC__
431 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
432 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
433 {
434   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
435   MatScalar   *value,*barray=baij->barray;
436   int         ierr,i,j,ii,jj,row,col;
437   int         roworiented = baij->roworiented,rstart=baij->rstart ;
438   int         rend=baij->rend,cstart=baij->cstart,stepval;
439   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   SETERRQ(1,1,"Function not yet written for SBAIJ format");
443   PetscFunctionReturn(0);
444 }
445 
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
448 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
450 #undef __FUNC__
451 #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar"
452 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
453 {
454   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
455   int         ierr,i,j,row,col;
456   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
457   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
458   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
459   PetscReal   tmp;
460   MatScalar   **HD = baij->hd,value;
461 #if defined(PETSC_USE_BOPT_g)
462   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
463 #endif
464 
465   PetscFunctionBegin;
466   SETERRQ(1,1,"Function not yet written for SBAIJ format");
467   PetscFunctionReturn(0);
468 }
469 
470 #undef __FUNC__
471 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
472 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
473 {
474   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
475   int         ierr,i,j,ii,jj,row,col;
476   int         roworiented = baij->roworiented,rstart=baij->rstart ;
477   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
478   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
479   PetscReal   tmp;
480   MatScalar   **HD = baij->hd,*baij_a;
481   MatScalar   *v_t,*value;
482 #if defined(PETSC_USE_BOPT_g)
483   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
484 #endif
485 
486   PetscFunctionBegin;
487   SETERRQ(1,1,"Function not yet written for SBAIJ format");
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNC__
492 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ"
493 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
494 {
495   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
496   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
497   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
498 
499   PetscFunctionBegin;
500   for (i=0; i<m; i++) {
501     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
502     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
503     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
504       row = idxm[i] - bsrstart;
505       for (j=0; j<n; j++) {
506         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
507         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
508         if (idxn[j] >= bscstart && idxn[j] < bscend){
509           col = idxn[j] - bscstart;
510           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
511         } else {
512           if (!baij->colmap) {
513             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
514           }
515 #if defined (PETSC_USE_CTABLE)
516           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
517           data --;
518 #else
519           data = baij->colmap[idxn[j]/bs]-1;
520 #endif
521           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
522           else {
523             col  = data + idxn[j]%bs;
524             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
525           }
526         }
527       }
528     } else {
529       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
530     }
531   }
532  PetscFunctionReturn(0);
533 }
534 
535 #undef __FUNC__
536 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ"
537 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
538 {
539   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
540   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
541   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
542   int        ierr;
543   PetscReal  sum[2],*lnorm2;
544 
545   PetscFunctionBegin;
546   if (baij->size == 1) {
547     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
548   } else {
549     if (type == NORM_FROBENIUS) {
550       lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2);
551       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
552       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
553       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
554       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
555       /*
556       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
557       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
558       */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       /*
561       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
562       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
563 
564       *norm = sqrt(sum[0] + 2*sum[1]);
565       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
566     } else {
567       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
568     }
569   }
570   PetscFunctionReturn(0);
571 }
572 
573 /*
574   Creates the hash table, and sets the table
575   This table is created only once.
576   If new entried need to be added to the matrix
577   then the hash table has to be destroyed and
578   recreated.
579 */
580 #undef __FUNC__
581 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private"
582 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
583 {
584   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
585   Mat         A = baij->A,B=baij->B;
586   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
587   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
588   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
589   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
590   int         *HT,key;
591   MatScalar   **HD;
592   PetscReal   tmp;
593 #if defined(PETSC_USE_BOPT_g)
594   int         ct=0,max=0;
595 #endif
596 
597   PetscFunctionBegin;
598   SETERRQ(1,1,"Function not yet written for SBAIJ format");
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNC__
603 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ"
604 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
605 {
606   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
607   int         ierr,nstash,reallocs;
608   InsertMode  addv;
609 
610   PetscFunctionBegin;
611   if (baij->donotstash) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) {
618     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
619   }
620   mat->insertmode = addv; /* in case this processor had no cache */
621 
622   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
623   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
624   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
625   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
626   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
627   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNC__
632 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ"
633 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
634 {
635   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
636   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
637   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
638   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
639   int         *row,*col,other_disassembled;
640   PetscTruth  r1,r2,r3;
641   MatScalar   *val;
642   InsertMode  addv = mat->insertmode;
643   int         rank;
644 
645   PetscFunctionBegin;
646   /* remove 2 line below later */
647   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
648 
649   if (!baij->donotstash) {
650     while (1) {
651       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
652       /*
653       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
654       PetscSynchronizedFlush(PETSC_COMM_WORLD);
655       */
656       if (!flg) break;
657 
658       for (i=0; i<n;) {
659         /* Now identify the consecutive vals belonging to the same row */
660         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
661         if (j < n) ncols = j-i;
662         else       ncols = n-i;
663         /* Now assemble all these values with a single function call */
664         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
665         i = j;
666       }
667     }
668     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
669     /* Now process the block-stash. Since the values are stashed column-oriented,
670        set the roworiented flag to column oriented, and after MatSetValues()
671        restore the original flags */
672     r1 = baij->roworiented;
673     r2 = a->roworiented;
674     r3 = b->roworiented;
675     baij->roworiented = PETSC_FALSE;
676     a->roworiented    = PETSC_FALSE;
677     b->roworiented    = PETSC_FALSE;
678     while (1) {
679       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
680       if (!flg) break;
681 
682       for (i=0; i<n;) {
683         /* Now identify the consecutive vals belonging to the same row */
684         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
685         if (j < n) ncols = j-i;
686         else       ncols = n-i;
687         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
688         i = j;
689       }
690     }
691     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
692     baij->roworiented = r1;
693     a->roworiented    = r2;
694     b->roworiented    = r3;
695   }
696 
697   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
698   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
699 
700   /* determine if any processor has disassembled, if so we must
701      also disassemble ourselfs, in order that we may reassemble. */
702   /*
703      if nonzero structure of submatrix B cannot change then we know that
704      no processor disassembled thus we can skip this stuff
705   */
706   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
707     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
708     if (mat->was_assembled && !other_disassembled) {
709       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
710     }
711   }
712 
713   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
714     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
715   }
716   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
717   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
718 
719 #if defined(PETSC_USE_BOPT_g)
720   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
721     PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
722     baij->ht_total_ct  = 0;
723     baij->ht_insert_ct = 0;
724   }
725 #endif
726   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
727     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
728     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
729     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
730   }
731 
732   if (baij->rowvalues) {
733     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
734     baij->rowvalues = 0;
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNC__
740 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket"
741 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
742 {
743   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)mat->data;
744   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
745   PetscTruth   isascii,isdraw;
746   Viewer       sviewer;
747 
748   PetscFunctionBegin;
749   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
750   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
751   if (isascii) {
752     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
753     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
754       MatInfo info;
755       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
757       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
758               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
759               baij->bs,(int)info.memory);CHKERRQ(ierr);
760       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
761       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
762       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
763       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
764       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
765       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
766       PetscFunctionReturn(0);
767     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
768       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
769       PetscFunctionReturn(0);
770     }
771   }
772 
773   if (isdraw) {
774     Draw       draw;
775     PetscTruth isnull;
776     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
777     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
778   }
779 
780   if (size == 1) {
781     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
782   } else {
783     /* assemble the entire matrix onto first processor. */
784     Mat         A;
785     Mat_SeqSBAIJ *Aloc;
786     Mat_SeqBAIJ *Bloc;
787     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
788     MatScalar   *a;
789 
790     if (!rank) {
791       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     } else {
793       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
794     }
795     PLogObjectParent(mat,A);
796 
797     /* copy over the A part */
798     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
799     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
800     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
801 
802     for (i=0; i<mbs; i++) {
803       rvals[0] = bs*(baij->rstart + i);
804       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
805       for (j=ai[i]; j<ai[i+1]; j++) {
806         col = (baij->cstart+aj[j])*bs;
807         for (k=0; k<bs; k++) {
808           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
809           col++; a += bs;
810         }
811       }
812     }
813     /* copy over the B part */
814     Bloc = (Mat_SeqBAIJ*)baij->B->data;
815     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
816     for (i=0; i<mbs; i++) {
817       rvals[0] = bs*(baij->rstart + i);
818       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
819       for (j=ai[i]; j<ai[i+1]; j++) {
820         col = baij->garray[aj[j]]*bs;
821         for (k=0; k<bs; k++) {
822           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
823           col++; a += bs;
824         }
825       }
826     }
827     ierr = PetscFree(rvals);CHKERRQ(ierr);
828     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830     /*
831        Everyone has to call to draw the matrix since the graphics waits are
832        synchronized across all processors that share the Draw object
833     */
834     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
835     if (!rank) {
836       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
837     }
838     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
839     ierr = MatDestroy(A);CHKERRQ(ierr);
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 #undef __FUNC__
845 #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ"
846 int MatView_MPISBAIJ(Mat mat,Viewer viewer)
847 {
848   int        ierr;
849   PetscTruth isascii,isdraw,issocket,isbinary;
850 
851   PetscFunctionBegin;
852   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
853   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
854   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
856   if (isascii || isdraw || issocket || isbinary) {
857     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
858   } else {
859     SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
860   }
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNC__
865 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ"
866 int MatDestroy_MPISBAIJ(Mat mat)
867 {
868   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
869   int         ierr;
870 
871   PetscFunctionBegin;
872 
873   if (mat->mapping) {
874     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
875   }
876   if (mat->bmapping) {
877     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
878   }
879   if (mat->rmap) {
880     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
881   }
882   if (mat->cmap) {
883     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
884   }
885 #if defined(PETSC_USE_LOG)
886   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
887 #endif
888 
889   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
890   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
891 
892   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
893   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
894   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
895 #if defined (PETSC_USE_CTABLE)
896   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
897 #else
898   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
899 #endif
900   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
901   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
902   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
903   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
904   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
905   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
906 #if defined(PETSC_USE_MAT_SINGLE)
907   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
908 #endif
909   ierr = PetscFree(baij);CHKERRQ(ierr);
910   PLogObjectDestroy(mat);
911   PetscHeaderDestroy(mat);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNC__
916 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ"
917 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
918 {
919   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
920   int         ierr,nt;
921 
922   PetscFunctionBegin;
923   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
924   if (nt != a->n) {
925     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
926   }
927   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
928   if (nt != a->m) {
929     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
930   }
931 
932   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
933   /* do diagonal part */
934   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
935   /* do supperdiagonal part */
936   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
937   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
938   /* do subdiagonal part */
939   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
940   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
941   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
942 
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNC__
947 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ"
948 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
949 {
950   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
951   int        ierr;
952 
953   PetscFunctionBegin;
954   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
955   /* do diagonal part */
956   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
957   /* do supperdiagonal part */
958   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
959   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
960 
961   /* do subdiagonal part */
962   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
963   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
965 
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNC__
970 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ"
971 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
972 {
973   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
974   int         ierr;
975 
976   PetscFunctionBegin;
977   SETERRQ(1,1,"Matrix is symmetric. Call MatMult().");
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNC__
982 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ"
983 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
986   int         ierr;
987 
988   PetscFunctionBegin;
989   SETERRQ(1,1,"Matrix is symmetric. Call MatMultAdd().");
990   PetscFunctionReturn(0);
991 }
992 
993 /*
994   This only works correctly for square matrices where the subblock A->A is the
995    diagonal block
996 */
997 #undef __FUNC__
998 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ"
999 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1000 {
1001   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1002   int         ierr;
1003 
1004   PetscFunctionBegin;
1005   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */
1006   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNC__
1011 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ"
1012 int MatScale_MPISBAIJ(Scalar *aa,Mat A)
1013 {
1014   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1015   int         ierr;
1016 
1017   PetscFunctionBegin;
1018   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1019   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNC__
1024 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ"
1025 int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n)
1026 {
1027   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1028 
1029   PetscFunctionBegin;
1030   if (m) *m = mat->M;
1031   if (n) *n = mat->N;
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 #undef __FUNC__
1036 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ"
1037 int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n)
1038 {
1039   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1040 
1041   PetscFunctionBegin;
1042   *m = mat->m; *n = mat->n;
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNC__
1047 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ"
1048 int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
1049 {
1050   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1051 
1052   PetscFunctionBegin;
1053   if (m) *m = mat->rstart*mat->bs;
1054   if (n) *n = mat->rend*mat->bs;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNC__
1059 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ"
1060 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1061 {
1062   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1063   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1064   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1065   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1066   int        *cmap,*idx_p,cstart = mat->cstart;
1067 
1068   PetscFunctionBegin;
1069   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1070   mat->getrowactive = PETSC_TRUE;
1071 
1072   if (!mat->rowvalues && (idx || v)) {
1073     /*
1074         allocate enough space to hold information from the longest row.
1075     */
1076     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1077     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1078     int     max = 1,mbs = mat->mbs,tmp;
1079     for (i=0; i<mbs; i++) {
1080       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1081       if (max < tmp) { max = tmp; }
1082     }
1083     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1084     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1085   }
1086 
1087   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1088   lrow = row - brstart;  /* local row index */
1089 
1090   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1091   if (!v)   {pvA = 0; pvB = 0;}
1092   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1093   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1094   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1095   nztot = nzA + nzB;
1096 
1097   cmap  = mat->garray;
1098   if (v  || idx) {
1099     if (nztot) {
1100       /* Sort by increasing column numbers, assuming A and B already sorted */
1101       int imark = -1;
1102       if (v) {
1103         *v = v_p = mat->rowvalues;
1104         for (i=0; i<nzB; i++) {
1105           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1106           else break;
1107         }
1108         imark = i;
1109         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1110         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1111       }
1112       if (idx) {
1113         *idx = idx_p = mat->rowindices;
1114         if (imark > -1) {
1115           for (i=0; i<imark; i++) {
1116             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1117           }
1118         } else {
1119           for (i=0; i<nzB; i++) {
1120             if (cmap[cworkB[i]/bs] < cstart)
1121               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1122             else break;
1123           }
1124           imark = i;
1125         }
1126         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1127         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1128       }
1129     } else {
1130       if (idx) *idx = 0;
1131       if (v)   *v   = 0;
1132     }
1133   }
1134   *nz = nztot;
1135   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1136   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNC__
1141 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ"
1142 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1143 {
1144   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1145 
1146   PetscFunctionBegin;
1147   if (baij->getrowactive == PETSC_FALSE) {
1148     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1149   }
1150   baij->getrowactive = PETSC_FALSE;
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNC__
1155 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ"
1156 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1157 {
1158   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1159 
1160   PetscFunctionBegin;
1161   *bs = baij->bs;
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNC__
1166 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ"
1167 int MatZeroEntries_MPISBAIJ(Mat A)
1168 {
1169   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1170   int         ierr;
1171 
1172   PetscFunctionBegin;
1173   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1174   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNC__
1179 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ"
1180 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1181 {
1182   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1183   Mat         A = a->A,B = a->B;
1184   int         ierr;
1185   PetscReal   isend[5],irecv[5];
1186 
1187   PetscFunctionBegin;
1188   info->block_size     = (double)a->bs;
1189   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1190   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1191   isend[3] = info->memory;  isend[4] = info->mallocs;
1192   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1193   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1194   isend[3] += info->memory;  isend[4] += info->mallocs;
1195   if (flag == MAT_LOCAL) {
1196     info->nz_used      = isend[0];
1197     info->nz_allocated = isend[1];
1198     info->nz_unneeded  = isend[2];
1199     info->memory       = isend[3];
1200     info->mallocs      = isend[4];
1201   } else if (flag == MAT_GLOBAL_MAX) {
1202     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1203     info->nz_used      = irecv[0];
1204     info->nz_allocated = irecv[1];
1205     info->nz_unneeded  = irecv[2];
1206     info->memory       = irecv[3];
1207     info->mallocs      = irecv[4];
1208   } else if (flag == MAT_GLOBAL_SUM) {
1209     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1210     info->nz_used      = irecv[0];
1211     info->nz_allocated = irecv[1];
1212     info->nz_unneeded  = irecv[2];
1213     info->memory       = irecv[3];
1214     info->mallocs      = irecv[4];
1215   } else {
1216     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1217   }
1218   info->rows_global       = (double)a->M;
1219   info->columns_global    = (double)a->N;
1220   info->rows_local        = (double)a->m;
1221   info->columns_local     = (double)a->N;
1222   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1223   info->fill_ratio_needed = 0;
1224   info->factor_mallocs    = 0;
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNC__
1229 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ"
1230 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1231 {
1232   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1233   int         ierr;
1234 
1235   PetscFunctionBegin;
1236   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1237       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1238       op == MAT_COLUMNS_UNSORTED ||
1239       op == MAT_COLUMNS_SORTED ||
1240       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1241       op == MAT_KEEP_ZEROED_ROWS ||
1242       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1243         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1244         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1245   } else if (op == MAT_ROW_ORIENTED) {
1246         a->roworiented = PETSC_TRUE;
1247         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1248         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1249   } else if (op == MAT_ROWS_SORTED ||
1250              op == MAT_ROWS_UNSORTED ||
1251              op == MAT_SYMMETRIC ||
1252              op == MAT_STRUCTURALLY_SYMMETRIC ||
1253              op == MAT_YES_NEW_DIAGONALS ||
1254              op == MAT_USE_HASH_TABLE) {
1255     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1256   } else if (op == MAT_COLUMN_ORIENTED) {
1257     a->roworiented = PETSC_FALSE;
1258     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1259     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1260   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1261     a->donotstash = PETSC_TRUE;
1262   } else if (op == MAT_NO_NEW_DIAGONALS) {
1263     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1264   } else if (op == MAT_USE_HASH_TABLE) {
1265     a->ht_flag = PETSC_TRUE;
1266   } else {
1267     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNC__
1273 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ("
1274 int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1275 {
1276   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1277   Mat_SeqBAIJ *Aloc;
1278   Mat         B;
1279   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1280   int         bs=baij->bs,mbs=baij->mbs;
1281   MatScalar   *a;
1282 
1283   PetscFunctionBegin;
1284   SETERRQ(1,1,"Matrix is symmetric. MatTranspose() should not be called");
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNC__
1289 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ"
1290 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1291 {
1292   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1293   Mat         a = baij->A,b = baij->B;
1294   int         ierr,s1,s2,s3;
1295 
1296   PetscFunctionBegin;
1297   if (ll != rr) {
1298     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n");
1299   }
1300   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1301   if (rr) {
1302     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1303     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1304     /* Overlap communication with computation. */
1305     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1306     /*} if (ll) { */
1307     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1308     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1309     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1310     /* } */
1311   /* scale  the diagonal block */
1312   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1313 
1314   /* if (rr) { */
1315     /* Do a scatter end and then right scale the off-diagonal block */
1316     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1317     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1318   }
1319 
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNC__
1324 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ"
1325 int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1326 {
1327   Mat_MPISBAIJ    *l = (Mat_MPISBAIJ*)A->data;
1328   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1329   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1330   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1331   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1332   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1333   MPI_Comm       comm = A->comm;
1334   MPI_Request    *send_waits,*recv_waits;
1335   MPI_Status     recv_status,*send_status;
1336   IS             istmp;
1337 
1338   PetscFunctionBegin;
1339   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1340   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1341 
1342   /*  first count number of contributors to each processor */
1343   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1344   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1345   procs  = nprocs + size;
1346   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1347   for (i=0; i<N; i++) {
1348     idx   = rows[i];
1349     found = 0;
1350     for (j=0; j<size; j++) {
1351       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1352         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1353       }
1354     }
1355     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1356   }
1357   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1358 
1359   /* inform other processors of number of messages and max length*/
1360   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1361   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1362   nmax   = work[rank];
1363   nrecvs = work[size+rank];
1364   ierr = PetscFree(work);CHKERRQ(ierr);
1365 
1366   /* post receives:   */
1367   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1368   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1369   for (i=0; i<nrecvs; i++) {
1370     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1371   }
1372 
1373   /* do sends:
1374      1) starts[i] gives the starting index in svalues for stuff going to
1375      the ith processor
1376   */
1377   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1378   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1379   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1380   starts[0]  = 0;
1381   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1382   for (i=0; i<N; i++) {
1383     svalues[starts[owner[i]]++] = rows[i];
1384   }
1385   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1386 
1387   starts[0] = 0;
1388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1389   count = 0;
1390   for (i=0; i<size; i++) {
1391     if (procs[i]) {
1392       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1393     }
1394   }
1395   ierr = PetscFree(starts);CHKERRQ(ierr);
1396 
1397   base = owners[rank]*bs;
1398 
1399   /*  wait on receives */
1400   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1401   source = lens + nrecvs;
1402   count  = nrecvs; slen = 0;
1403   while (count) {
1404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1405     /* unpack receives into our local space */
1406     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1407     source[imdex]  = recv_status.MPI_SOURCE;
1408     lens[imdex]    = n;
1409     slen          += n;
1410     count--;
1411   }
1412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1413 
1414   /* move the data into the send scatter */
1415   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1416   count = 0;
1417   for (i=0; i<nrecvs; i++) {
1418     values = rvalues + i*nmax;
1419     for (j=0; j<lens[i]; j++) {
1420       lrows[count++] = values[j] - base;
1421     }
1422   }
1423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1424   ierr = PetscFree(lens);CHKERRQ(ierr);
1425   ierr = PetscFree(owner);CHKERRQ(ierr);
1426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1427 
1428   /* actually zap the local rows */
1429   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1430   PLogObjectParent(A,istmp);
1431 
1432   /*
1433         Zero the required rows. If the "diagonal block" of the matrix
1434      is square and the user wishes to set the diagonal we use seperate
1435      code so that MatSetValues() is not called for each diagonal allocating
1436      new memory, thus calling lots of mallocs and slowing things down.
1437 
1438        Contributed by: Mathew Knepley
1439   */
1440   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1441   ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1442   if (diag && (l->A->M == l->A->N)) {
1443     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1444   } else if (diag) {
1445     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1446     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1447       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1448 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1449     }
1450     for (i=0; i<slen; i++) {
1451       row  = lrows[i] + rstart_bs;
1452       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1453     }
1454     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1456   } else {
1457     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1458   }
1459 
1460   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1461   ierr = PetscFree(lrows);CHKERRQ(ierr);
1462 
1463   /* wait on sends */
1464   if (nsends) {
1465     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1466     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1467     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1468   }
1469   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1470   ierr = PetscFree(svalues);CHKERRQ(ierr);
1471 
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNC__
1476 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ"
1477 int MatPrintHelp_MPISBAIJ(Mat A)
1478 {
1479   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1480   MPI_Comm    comm = A->comm;
1481   static int  called = 0;
1482   int         ierr;
1483 
1484   PetscFunctionBegin;
1485   if (!a->rank) {
1486     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1487   }
1488   if (called) {PetscFunctionReturn(0);} else called = 1;
1489   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1490   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNC__
1495 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ"
1496 int MatSetUnfactored_MPISBAIJ(Mat A)
1497 {
1498   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1499   int         ierr;
1500 
1501   PetscFunctionBegin;
1502   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1507 
1508 #undef __FUNC__
1509 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ"
1510 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1511 {
1512   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1513   Mat         a,b,c,d;
1514   PetscTruth  flg;
1515   int         ierr;
1516 
1517   PetscFunctionBegin;
1518   if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1519   a = matA->A; b = matA->B;
1520   c = matB->A; d = matB->B;
1521 
1522   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1523   if (flg == PETSC_TRUE) {
1524     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1525   }
1526   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 /* -------------------------------------------------------------------*/
1531 static struct _MatOps MatOps_Values = {
1532   MatSetValues_MPISBAIJ,
1533   MatGetRow_MPISBAIJ,
1534   MatRestoreRow_MPISBAIJ,
1535   MatMult_MPISBAIJ,
1536   MatMultAdd_MPISBAIJ,
1537   MatMultTranspose_MPISBAIJ,
1538   MatMultTransposeAdd_MPISBAIJ,
1539   0,
1540   0,
1541   0,
1542   0,
1543   0,
1544   0,
1545   0,
1546   MatTranspose_MPISBAIJ,
1547   MatGetInfo_MPISBAIJ,
1548   MatEqual_MPISBAIJ,
1549   MatGetDiagonal_MPISBAIJ,
1550   MatDiagonalScale_MPISBAIJ,
1551   MatNorm_MPISBAIJ,
1552   MatAssemblyBegin_MPISBAIJ,
1553   MatAssemblyEnd_MPISBAIJ,
1554   0,
1555   MatSetOption_MPISBAIJ,
1556   MatZeroEntries_MPISBAIJ,
1557   MatZeroRows_MPISBAIJ,
1558   0,
1559   0,
1560   0,
1561   0,
1562   MatGetSize_MPISBAIJ,
1563   MatGetLocalSize_MPISBAIJ,
1564   MatGetOwnershipRange_MPISBAIJ,
1565   0,
1566   0,
1567   0,
1568   0,
1569   MatDuplicate_MPISBAIJ,
1570   0,
1571   0,
1572   0,
1573   0,
1574   0,
1575   MatGetSubMatrices_MPISBAIJ,
1576   MatIncreaseOverlap_MPISBAIJ,
1577   MatGetValues_MPISBAIJ,
1578   0,
1579   MatPrintHelp_MPISBAIJ,
1580   MatScale_MPISBAIJ,
1581   0,
1582   0,
1583   0,
1584   MatGetBlockSize_MPISBAIJ,
1585   0,
1586   0,
1587   0,
1588   0,
1589   0,
1590   0,
1591   MatSetUnfactored_MPISBAIJ,
1592   0,
1593   MatSetValuesBlocked_MPISBAIJ,
1594   0,
1595   0,
1596   0,
1597   MatGetMaps_Petsc};
1598 
1599 
1600 EXTERN_C_BEGIN
1601 #undef __FUNC__
1602 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ"
1603 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1604 {
1605   PetscFunctionBegin;
1606   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1607   *iscopy = PETSC_FALSE;
1608   PetscFunctionReturn(0);
1609 }
1610 EXTERN_C_END
1611 
1612 #undef __FUNC__
1613 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ"
1614 /*@C
1615    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1616    (block compressed row).  For good matrix assembly performance
1617    the user should preallocate the matrix storage by setting the parameters
1618    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1619    performance can be increased by more than a factor of 50.
1620 
1621    Collective on MPI_Comm
1622 
1623    Input Parameters:
1624 +  comm - MPI communicator
1625 .  bs   - size of blockk
1626 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1627            This value should be the same as the local size used in creating the
1628            y vector for the matrix-vector product y = Ax.
1629 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1630            This value should be the same as the local size used in creating the
1631            x vector for the matrix-vector product y = Ax.
1632 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1633 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1634 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1635            submatrix  (same for all local rows)
1636 .  d_nnz - array containing the number of block nonzeros in the various block rows
1637            of the in diagonal portion of the local (possibly different for each block
1638            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1639 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1640            submatrix (same for all local rows).
1641 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1642            off-diagonal portion of the local submatrix (possibly different for
1643            each block row) or PETSC_NULL.
1644 
1645    Output Parameter:
1646 .  A - the matrix
1647 
1648    Options Database Keys:
1649 .   -mat_no_unroll - uses code that does not unroll the loops in the
1650                      block calculations (much slower)
1651 .   -mat_block_size - size of the blocks to use
1652 .   -mat_mpi - use the parallel matrix data structures even on one processor
1653                (defaults to using SeqBAIJ format on one processor)
1654 
1655    Notes:
1656    The user MUST specify either the local or global matrix dimensions
1657    (possibly both).
1658 
1659    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1660    than it must be used on all processors that share the object for that argument.
1661 
1662    Storage Information:
1663    For a square global matrix we define each processor's diagonal portion
1664    to be its local rows and the corresponding columns (a square submatrix);
1665    each processor's off-diagonal portion encompasses the remainder of the
1666    local matrix (a rectangular submatrix).
1667 
1668    The user can specify preallocated storage for the diagonal part of
1669    the local submatrix with either d_nz or d_nnz (not both).  Set
1670    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1671    memory allocation.  Likewise, specify preallocated storage for the
1672    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1673 
1674    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1675    the figure below we depict these three local rows and all columns (0-11).
1676 
1677 .vb
1678            0 1 2 3 4 5 6 7 8 9 10 11
1679           -------------------
1680    row 3  |  o o o d d d o o o o o o
1681    row 4  |  o o o d d d o o o o o o
1682    row 5  |  o o o d d d o o o o o o
1683           -------------------
1684 .ve
1685 
1686    Thus, any entries in the d locations are stored in the d (diagonal)
1687    submatrix, and any entries in the o locations are stored in the
1688    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1689    stored simply in the MATSEQBAIJ format for compressed row storage.
1690 
1691    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1692    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1693    In general, for PDE problems in which most nonzeros are near the diagonal,
1694    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1695    or you will get TERRIBLE performance; see the users' manual chapter on
1696    matrices.
1697 
1698    Level: intermediate
1699 
1700 .keywords: matrix, block, aij, compressed row, sparse, parallel
1701 
1702 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1703 @*/
1704 
1705 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1706 {
1707   Mat          B;
1708   Mat_MPISBAIJ  *b;
1709   int          ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size;
1710   PetscTruth   flag1,flag2,flg;
1711 
1712   PetscFunctionBegin;
1713   if (M != N || m != n){ /* N and n are not used after this */
1714     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n");
1715   }
1716   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1717 
1718   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1719   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1720   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1721   if (d_nnz) {
1722     for (i=0; i<m/bs; i++) {
1723       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1724     }
1725   }
1726   if (o_nnz) {
1727     for (i=0; i<m/bs; i++) {
1728       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1729     }
1730   }
1731 
1732   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1733   ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr);
1734   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1735   if (!flag1 && !flag2 && size == 1) {
1736     if (M == PETSC_DECIDE) M = m;
1737     ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr);
1738     PetscFunctionReturn(0);
1739   }
1740 
1741   *A = 0;
1742   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView);
1743   PLogObjectCreate(B);
1744   B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b);
1745   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1746   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1747 
1748   B->ops->destroy    = MatDestroy_MPISBAIJ;
1749   B->ops->view       = MatView_MPISBAIJ;
1750   B->mapping    = 0;
1751   B->factor     = 0;
1752   B->assembled  = PETSC_FALSE;
1753 
1754   B->insertmode = NOT_SET_VALUES;
1755   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1756   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
1757 
1758   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
1759     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1760   }
1761   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
1762     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1763   }
1764   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1765 
1766   if (M == PETSC_DECIDE) {
1767     work[0] = m; mbs = m/bs;
1768     ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1769     M = sum[0]; Mbs = M/bs;
1770   } else { /* M is specified */
1771     Mbs = M/bs;
1772     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1773     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1774     m   = mbs*bs;
1775   }
1776 
1777   if (mbs*bs != m) {
1778     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize");
1779   }
1780 
1781   b->m = m; B->m = m;
1782   b->n = m; B->n = m;
1783   b->N = M; B->N = M;
1784   b->M = M;
1785   B->M = M;
1786   b->bs  = bs;
1787   b->bs2 = bs*bs;
1788   b->mbs = mbs;
1789   b->nbs = mbs;
1790   b->Mbs = Mbs;
1791   b->Nbs = Mbs;
1792 
1793   /* the information in the maps duplicates the information computed below, eventually
1794      we should remove the duplicate information that is not contained in the maps */
1795   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
1796   ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr);
1797 
1798   /* build local table of row and column ownerships */
1799   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1800   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1801   b->cowners    = b->rowners + b->size + 2;
1802   b->rowners_bs = b->cowners + b->size + 2;
1803   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1804   b->rowners[0]    = 0;
1805   for (i=2; i<=b->size; i++) {
1806     b->rowners[i] += b->rowners[i-1];
1807   }
1808   for (i=0; i<=b->size; i++) {
1809     b->rowners_bs[i] = b->rowners[i]*bs;
1810   }
1811   b->rstart    = b->rowners[b->rank];
1812   b->rend      = b->rowners[b->rank+1];
1813   b->rstart_bs = b->rstart * bs;
1814   b->rend_bs   = b->rend * bs;
1815 
1816   b->cstart    = b->rstart;
1817   b->cend      = b->rend;
1818   b->cstart_bs = b->cstart * bs;
1819   b->cend_bs   = b->cend * bs;
1820 
1821 
1822   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1823   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1824   PLogObjectParent(B,b->A);
1825   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1826   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1827   PLogObjectParent(B,b->B);
1828 
1829   /* build cache for off array entries formed */
1830   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1831   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1832   b->donotstash  = PETSC_FALSE;
1833   b->colmap      = PETSC_NULL;
1834   b->garray      = PETSC_NULL;
1835   b->roworiented = PETSC_TRUE;
1836 
1837 #if defined(PEYSC_USE_MAT_SINGLE)
1838   /* stuff for MatSetValues_XXX in single precision */
1839   b->lensetvalues     = 0;
1840   b->setvaluescopy    = PETSC_NULL;
1841 #endif
1842 
1843   /* stuff used in block assembly */
1844   b->barray       = 0;
1845 
1846   /* stuff used for matrix vector multiply */
1847   b->lvec         = 0;
1848   b->Mvctx        = 0;
1849 
1850   /* stuff for MatGetRow() */
1851   b->rowindices   = 0;
1852   b->rowvalues    = 0;
1853   b->getrowactive = PETSC_FALSE;
1854 
1855   /* hash table stuff */
1856   b->ht           = 0;
1857   b->hd           = 0;
1858   b->ht_size      = 0;
1859   b->ht_flag      = PETSC_FALSE;
1860   b->ht_fact      = 0;
1861   b->ht_total_ct  = 0;
1862   b->ht_insert_ct = 0;
1863 
1864   *A = B;
1865   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1866   if (flg) {
1867     double fact = 1.39;
1868     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1869     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1870     if (fact <= 1.0) fact = 1.39;
1871     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1872     PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1873   }
1874   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1875                                      "MatStoreValues_MPISBAIJ",
1876                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1877   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1878                                      "MatRetrieveValues_MPISBAIJ",
1879                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1880   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1881                                      "MatGetDiagonalBlock_MPISBAIJ",
1882                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 
1887 #undef __FUNC__
1888 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ"
1889 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1890 {
1891   Mat         mat;
1892   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1893   int         ierr,len=0;
1894   PetscTruth  flg;
1895 
1896   PetscFunctionBegin;
1897   *newmat       = 0;
1898   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView);
1899   PLogObjectCreate(mat);
1900   mat->data         = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a);
1901   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1902   mat->ops->destroy = MatDestroy_MPISBAIJ;
1903   mat->ops->view    = MatView_MPISBAIJ;
1904   mat->factor       = matin->factor;
1905   mat->assembled    = PETSC_TRUE;
1906   mat->insertmode   = NOT_SET_VALUES;
1907 
1908   a->m = mat->m   = oldmat->m;
1909   a->n = mat->n   = oldmat->n;
1910   a->M = mat->M   = oldmat->M;
1911   a->N = mat->N   = oldmat->N;
1912 
1913   a->bs  = oldmat->bs;
1914   a->bs2 = oldmat->bs2;
1915   a->mbs = oldmat->mbs;
1916   a->nbs = oldmat->nbs;
1917   a->Mbs = oldmat->Mbs;
1918   a->Nbs = oldmat->Nbs;
1919 
1920   a->rstart       = oldmat->rstart;
1921   a->rend         = oldmat->rend;
1922   a->cstart       = oldmat->cstart;
1923   a->cend         = oldmat->cend;
1924   a->size         = oldmat->size;
1925   a->rank         = oldmat->rank;
1926   a->donotstash   = oldmat->donotstash;
1927   a->roworiented  = oldmat->roworiented;
1928   a->rowindices   = 0;
1929   a->rowvalues    = 0;
1930   a->getrowactive = PETSC_FALSE;
1931   a->barray       = 0;
1932   a->rstart_bs    = oldmat->rstart_bs;
1933   a->rend_bs      = oldmat->rend_bs;
1934   a->cstart_bs    = oldmat->cstart_bs;
1935   a->cend_bs      = oldmat->cend_bs;
1936 
1937   /* hash table stuff */
1938   a->ht           = 0;
1939   a->hd           = 0;
1940   a->ht_size      = 0;
1941   a->ht_flag      = oldmat->ht_flag;
1942   a->ht_fact      = oldmat->ht_fact;
1943   a->ht_total_ct  = 0;
1944   a->ht_insert_ct = 0;
1945 
1946 
1947   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1948   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1949   a->cowners    = a->rowners + a->size + 2;
1950   a->rowners_bs = a->cowners + a->size + 2;
1951   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1952   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1953   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
1954   if (oldmat->colmap) {
1955 #if defined (PETSC_USE_CTABLE)
1956   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1957 #else
1958     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
1959     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1960     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
1961 #endif
1962   } else a->colmap = 0;
1963   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1964     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
1965     PLogObjectMemory(mat,len*sizeof(int));
1966     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
1967   } else a->garray = 0;
1968 
1969   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1970   PLogObjectParent(mat,a->lvec);
1971   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1972 
1973   PLogObjectParent(mat,a->Mvctx);
1974   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1975   PLogObjectParent(mat,a->A);
1976   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1977   PLogObjectParent(mat,a->B);
1978   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1979   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1980   if (flg) {
1981     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1982   }
1983   *newmat = mat;
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #include "petscsys.h"
1988 
1989 #undef __FUNC__
1990 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ"
1991 int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat)
1992 {
1993   Mat          A;
1994   int          i,nz,ierr,j,rstart,rend,fd;
1995   Scalar       *vals,*buf;
1996   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1997   MPI_Status   status;
1998   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
1999   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2000   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2001   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2002   int          dcount,kmax,k,nzcount,tmp;
2003 
2004   PetscFunctionBegin;
2005   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2006 
2007   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2008   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2009   if (!rank) {
2010     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2011     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2012     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2013     if (header[3] < 0) {
2014       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ");
2015     }
2016   }
2017 
2018   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2019   M = header[1]; N = header[2];
2020 
2021   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2022 
2023   /*
2024      This code adds extra rows to make sure the number of rows is
2025      divisible by the blocksize
2026   */
2027   Mbs        = M/bs;
2028   extra_rows = bs - M + bs*(Mbs);
2029   if (extra_rows == bs) extra_rows = 0;
2030   else                  Mbs++;
2031   if (extra_rows &&!rank) {
2032     PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2033   }
2034 
2035   /* determine ownership of all rows */
2036   mbs = Mbs/size + ((Mbs % size) > rank);
2037   m   = mbs * bs;
2038   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2039   browners = rowners + size + 1;
2040   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2041   rowners[0] = 0;
2042   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2043   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2044   rstart = rowners[rank];
2045   rend   = rowners[rank+1];
2046 
2047   /* distribute row lengths to all processors */
2048   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2049   if (!rank) {
2050     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2051     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2052     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2053     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2054     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2055     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2056     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2057   } else {
2058     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2059   }
2060 
2061   if (!rank) {   /* procs[0] */
2062     /* calculate the number of nonzeros on each processor */
2063     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2064     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2065     for (i=0; i<size; i++) {
2066       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2067         procsnz[i] += rowlengths[j];
2068       }
2069     }
2070     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2071 
2072     /* determine max buffer needed and allocate it */
2073     maxnz = 0;
2074     for (i=0; i<size; i++) {
2075       maxnz = PetscMax(maxnz,procsnz[i]);
2076     }
2077     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2078 
2079     /* read in my part of the matrix column indices  */
2080     nz = procsnz[0];
2081     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2082     mycols = ibuf;
2083     if (size == 1)  nz -= extra_rows;
2084     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2085     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2086 
2087     /* read in every ones (except the last) and ship off */
2088     for (i=1; i<size-1; i++) {
2089       nz   = procsnz[i];
2090       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2091       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2092     }
2093     /* read in the stuff for the last proc */
2094     if (size != 1) {
2095       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2096       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2097       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2098       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2099     }
2100     ierr = PetscFree(cols);CHKERRQ(ierr);
2101   } else {  /* procs[i], i>0 */
2102     /* determine buffer space needed for message */
2103     nz = 0;
2104     for (i=0; i<m; i++) {
2105       nz += locrowlens[i];
2106     }
2107     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2108     mycols = ibuf;
2109     /* receive message of column indices*/
2110     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2111     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2112     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2113   }
2114 
2115   /* loop over local rows, determining number of off diagonal entries */
2116   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2117   odlens = dlens + (rend-rstart);
2118   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2119   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2120   masked1 = mask    + Mbs;
2121   masked2 = masked1 + Mbs;
2122   rowcount = 0; nzcount = 0;
2123   for (i=0; i<mbs; i++) {
2124     dcount  = 0;
2125     odcount = 0;
2126     for (j=0; j<bs; j++) {
2127       kmax = locrowlens[rowcount];
2128       for (k=0; k<kmax; k++) {
2129         tmp = mycols[nzcount++]/bs; /* block col. index */
2130         if (!mask[tmp]) {
2131           mask[tmp] = 1;
2132           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2133           else masked1[dcount++] = tmp; /* entry in diag portion */
2134         }
2135       }
2136       rowcount++;
2137     }
2138 
2139     dlens[i]  = dcount;  /* d_nzz[i] */
2140     odlens[i] = odcount; /* o_nzz[i] */
2141 
2142     /* zero out the mask elements we set */
2143     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2144     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2145   }
2146 
2147   /* create our matrix */
2148   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2149   CHKERRQ(ierr);
2150   A = *newmat;
2151   MatSetOption(A,MAT_COLUMNS_SORTED);
2152 
2153   if (!rank) {
2154     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2155     /* read in my part of the matrix numerical values  */
2156     nz = procsnz[0];
2157     vals = buf;
2158     mycols = ibuf;
2159     if (size == 1)  nz -= extra_rows;
2160     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2161     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2162 
2163     /* insert into matrix */
2164     jj      = rstart*bs;
2165     for (i=0; i<m; i++) {
2166       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2167       mycols += locrowlens[i];
2168       vals   += locrowlens[i];
2169       jj++;
2170     }
2171 
2172     /* read in other processors (except the last one) and ship out */
2173     for (i=1; i<size-1; i++) {
2174       nz   = procsnz[i];
2175       vals = buf;
2176       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2177       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2178     }
2179     /* the last proc */
2180     if (size != 1){
2181       nz   = procsnz[i] - extra_rows;
2182       vals = buf;
2183       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2184       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2185       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2186     }
2187     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2188 
2189   } else {
2190     /* receive numeric values */
2191     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2192 
2193     /* receive message of values*/
2194     vals   = buf;
2195     mycols = ibuf;
2196     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2197     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2198     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2199 
2200     /* insert into matrix */
2201     jj      = rstart*bs;
2202     for (i=0; i<m; i++) {
2203       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2204       mycols += locrowlens[i];
2205       vals   += locrowlens[i];
2206       jj++;
2207     }
2208   }
2209 
2210   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2211   ierr = PetscFree(buf);CHKERRQ(ierr);
2212   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2213   ierr = PetscFree(rowners);CHKERRQ(ierr);
2214   ierr = PetscFree(dlens);CHKERRQ(ierr);
2215   ierr = PetscFree(mask);CHKERRQ(ierr);
2216   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2217   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNC__
2222 #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor"
2223 /*@
2224    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2225 
2226    Input Parameters:
2227 .  mat  - the matrix
2228 .  fact - factor
2229 
2230    Collective on Mat
2231 
2232    Level: advanced
2233 
2234   Notes:
2235    This can also be set by the command line option: -mat_use_hash_table fact
2236 
2237 .keywords: matrix, hashtable, factor, HT
2238 
2239 .seealso: MatSetOption()
2240 @*/
2241 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2242 {
2243   Mat_MPIBAIJ *baij;
2244 
2245   PetscFunctionBegin;
2246   SETERRQ(1,1,"Function not yet written for SBAIJ format");
2247   PetscFunctionReturn(0);
2248 }
2249