xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 63bb2aa60f4717cbfb86ebed81540aa885ea575c)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *);
14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
83 static int CreateColmap_MPISBAIJ_Private(Mat mat)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"Function not yet written for SBAIJ format");
87   /* PetscFunctionReturn(0); */
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
123  \
124         /* malloc new storage space */ \
125         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
127         new_j = (int*)(new_a + bs2*new_nz); \
128         new_i = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
199  \
200         /* malloc new storage space */ \
201         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
203         new_j = (int*)(new_a + bs2*new_nz); \
204         new_i = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNCT__
248 #define __FUNCT__ "MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int          ierr,i,N = m*n;
253   MatScalar    *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,"Function not yet written for SBAIJ format");
302   /* PetscFunctionReturn(0); */
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,"Function not yet written for SBAIJ format");
315   /* PetscFunctionReturn(0); */
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSetValues_MPIBAIJ"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar    value;
328   PetscTruth   roworiented = baij->roworiented;
329   int          ierr,i,j,row,col;
330   int          rstart_orig=baij->rstart_bs;
331   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
332   int          cend_orig=baij->cend_bs,bs=baij->bs;
333 
334   /* Some Variables required in the macro */
335   Mat          A = baij->A;
336   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
337   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
338   MatScalar    *aa=a->a;
339 
340   Mat          B = baij->B;
341   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
342   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
343   MatScalar    *ba=b->a;
344 
345   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
346   int          low,high,t,ridx,cidx,bs2=a->bs2;
347   MatScalar    *ap,*bap;
348 
349   /* for stash */
350   int          n_loc, *in_loc=0;
351   MatScalar    *v_loc=0;
352 
353   PetscFunctionBegin;
354 
355   if(!baij->donotstash){
356     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
357     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
358   }
359 
360   for (i=0; i<m; i++) {
361     if (im[i] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
364 #endif
365     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
366       row = im[i] - rstart_orig;              /* local row index */
367       for (j=0; j<n; j++) {
368         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
369         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
370           col = in[j] - cstart_orig;          /* local col index */
371           brow = row/bs; bcol = col/bs;
372           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         } else if (in[j] < 0) continue;
377 #if defined(PETSC_USE_BOPT_g)
378         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
379 #endif
380         else {  /* off-diag entry (B) */
381           if (mat->was_assembled) {
382             if (!baij->colmap) {
383               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
384             }
385 #if defined (PETSC_USE_CTABLE)
386             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
387             col  = col - 1;
388 #else
389             col = baij->colmap[in[j]/bs] - 1;
390 #endif
391             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
395               B = baij->B;
396               b = (Mat_SeqBAIJ*)(B)->data;
397               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
398               ba=b->a;
399             } else col += in[j]%bs;
400           } else col = in[j];
401           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
402           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
403           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
404         }
405       }
406     } else {  /* off processor entry */
407       if (!baij->donotstash) {
408         n_loc = 0;
409         for (j=0; j<n; j++){
410           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
411           in_loc[n_loc] = in[j];
412           if (roworiented) {
413             v_loc[n_loc] = v[i*n+j];
414           } else {
415             v_loc[n_loc] = v[j*m+i];
416           }
417           n_loc++;
418         }
419         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
420       }
421     }
422   }
423 
424   if(!baij->donotstash){
425     ierr = PetscFree(in_loc);CHKERRQ(ierr);
426     ierr = PetscFree(v_loc);CHKERRQ(ierr);
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
433 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
434 {
435   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
436   MatScalar    *value,*barray=baij->barray;
437   PetscTruth   roworiented = baij->roworiented;
438   int          ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
439   int          rend=baij->rend,cstart=baij->cstart,stepval;
440   int          cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
441 
442   PetscFunctionBegin;
443   if(!barray) {
444     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
445     baij->barray = barray;
446   }
447 
448   if (roworiented) {
449     stepval = (n-1)*bs;
450   } else {
451     stepval = (m-1)*bs;
452   }
453   for (i=0; i<m; i++) {
454     if (im[i] < 0) continue;
455 #if defined(PETSC_USE_BOPT_g)
456     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
457 #endif
458     if (im[i] >= rstart && im[i] < rend) {
459       row = im[i] - rstart;
460       for (j=0; j<n; j++) {
461         /* If NumCol = 1 then a copy is not required */
462         if ((roworiented) && (n == 1)) {
463           barray = v + i*bs2;
464         } else if((!roworiented) && (m == 1)) {
465           barray = v + j*bs2;
466         } else { /* Here a copy is required */
467           if (roworiented) {
468             value = v + i*(stepval+bs)*bs + j*bs;
469           } else {
470             value = v + j*(stepval+bs)*bs + i*bs;
471           }
472           for (ii=0; ii<bs; ii++,value+=stepval) {
473             for (jj=0; jj<bs; jj++) {
474               *barray++  = *value++;
475             }
476           }
477           barray -=bs2;
478         }
479 
480         if (in[j] >= cstart && in[j] < cend){
481           col  = in[j] - cstart;
482           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
483         }
484         else if (in[j] < 0) continue;
485 #if defined(PETSC_USE_BOPT_g)
486         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
487 #endif
488         else {
489           if (mat->was_assembled) {
490             if (!baij->colmap) {
491               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
492             }
493 
494 #if defined(PETSC_USE_BOPT_g)
495 #if defined (PETSC_USE_CTABLE)
496             { int data;
497               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
498               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
499             }
500 #else
501             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
502 #endif
503 #endif
504 #if defined (PETSC_USE_CTABLE)
505 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
506             col  = (col - 1)/bs;
507 #else
508             col = (baij->colmap[in[j]] - 1)/bs;
509 #endif
510             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
511               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
512               col =  in[j];
513             }
514           }
515           else col = in[j];
516           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
517         }
518       }
519     } else {
520       if (!baij->donotstash) {
521         if (roworiented) {
522           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
523         } else {
524           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         }
526       }
527     }
528   }
529   PetscFunctionReturn(0);
530 }
531 
532 #define HASH_KEY 0.6180339887
533 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
534 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
535 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
536 #undef __FUNCT__
537 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
538 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
539 {
540   PetscFunctionBegin;
541   SETERRQ(1,"Function not yet written for SBAIJ format");
542   /* PetscFunctionReturn(0); */
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
547 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
548 {
549   PetscFunctionBegin;
550   SETERRQ(1,"Function not yet written for SBAIJ format");
551   /* PetscFunctionReturn(0); */
552 }
553 
554 #undef __FUNCT__
555 #define __FUNCT__ "MatGetValues_MPISBAIJ"
556 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
557 {
558   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
559   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
560   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
561 
562   PetscFunctionBegin;
563   for (i=0; i<m; i++) {
564     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
565     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
566     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
567       row = idxm[i] - bsrstart;
568       for (j=0; j<n; j++) {
569         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
570         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
571         if (idxn[j] >= bscstart && idxn[j] < bscend){
572           col = idxn[j] - bscstart;
573           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
574         } else {
575           if (!baij->colmap) {
576             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
577           }
578 #if defined (PETSC_USE_CTABLE)
579           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
580           data --;
581 #else
582           data = baij->colmap[idxn[j]/bs]-1;
583 #endif
584           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
585           else {
586             col  = data + idxn[j]%bs;
587             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
588           }
589         }
590       }
591     } else {
592       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
593     }
594   }
595  PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatNorm_MPISBAIJ"
600 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
601 {
602   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
603   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
604   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
605   int        ierr;
606   PetscReal  sum[2],*lnorm2;
607 
608   PetscFunctionBegin;
609   if (baij->size == 1) {
610     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
611   } else {
612     if (type == NORM_FROBENIUS) {
613       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
614       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
615       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
616       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
617       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
618       /*
619       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
620       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
621       */
622       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
623       /*
624       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
625       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
626 
627       *norm = sqrt(sum[0] + 2*sum[1]);
628       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
629     } else {
630       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
631     }
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637   Creates the hash table, and sets the table
638   This table is created only once.
639   If new entried need to be added to the matrix
640   then the hash table has to be destroyed and
641   recreated.
642 */
643 #undef __FUNCT__
644 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
645 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
646 {
647   PetscFunctionBegin;
648   SETERRQ(1,"Function not yet written for SBAIJ format");
649   /* PetscFunctionReturn(0); */
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
654 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
655 {
656   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
657   int         ierr,nstash,reallocs;
658   InsertMode  addv;
659 
660   PetscFunctionBegin;
661   if (baij->donotstash) {
662     PetscFunctionReturn(0);
663   }
664 
665   /* make sure all processors are either in INSERTMODE or ADDMODE */
666   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
667   if (addv == (ADD_VALUES|INSERT_VALUES)) {
668     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
669   }
670   mat->insertmode = addv; /* in case this processor had no cache */
671 
672   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
673   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
674   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
675   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
676   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
677   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
683 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
684 {
685   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
686   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
687   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
688   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
689   int         *row,*col,other_disassembled;
690   PetscTruth  r1,r2,r3;
691   MatScalar   *val;
692   InsertMode  addv = mat->insertmode;
693 #if defined(PETSC_HAVE_SPOOLES)
694   PetscTruth  flag;
695 #endif
696 
697   PetscFunctionBegin;
698 
699   if (!baij->donotstash) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       /*
703       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
704       PetscSynchronizedFlush(PETSC_COMM_WORLD);
705       */
706       if (!flg) break;
707 
708       for (i=0; i<n;) {
709         /* Now identify the consecutive vals belonging to the same row */
710         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
711         if (j < n) ncols = j-i;
712         else       ncols = n-i;
713         /* Now assemble all these values with a single function call */
714         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
715         i = j;
716       }
717     }
718     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
719     /* Now process the block-stash. Since the values are stashed column-oriented,
720        set the roworiented flag to column oriented, and after MatSetValues()
721        restore the original flags */
722     r1 = baij->roworiented;
723     r2 = a->roworiented;
724     r3 = b->roworiented;
725     baij->roworiented = PETSC_FALSE;
726     a->roworiented    = PETSC_FALSE;
727     b->roworiented    = PETSC_FALSE;
728     while (1) {
729       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
730       if (!flg) break;
731 
732       for (i=0; i<n;) {
733         /* Now identify the consecutive vals belonging to the same row */
734         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
735         if (j < n) ncols = j-i;
736         else       ncols = n-i;
737         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
738         i = j;
739       }
740     }
741     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
742     baij->roworiented = r1;
743     a->roworiented    = r2;
744     b->roworiented    = r3;
745   }
746 
747   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
748   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
749 
750   /* determine if any processor has disassembled, if so we must
751      also disassemble ourselfs, in order that we may reassemble. */
752   /*
753      if nonzero structure of submatrix B cannot change then we know that
754      no processor disassembled thus we can skip this stuff
755   */
756   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
757     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
758     if (mat->was_assembled && !other_disassembled) {
759       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
760     }
761   }
762 
763   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
764     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
765   }
766   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
767   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
768 
769 #if defined(PETSC_USE_BOPT_g)
770   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
771     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
772     baij->ht_total_ct  = 0;
773     baij->ht_insert_ct = 0;
774   }
775 #endif
776   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
777     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
778     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
779     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
780   }
781 
782   if (baij->rowvalues) {
783     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
784     baij->rowvalues = 0;
785   }
786 
787 #if defined(PETSC_HAVE_SPOOLES)
788   ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
789   if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); }
790 #endif
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
796 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
797 {
798   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
799   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
800   PetscTruth        isascii,isdraw;
801   PetscViewer       sviewer;
802   PetscViewerFormat format;
803 
804   PetscFunctionBegin;
805   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
806   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
807   if (isascii) {
808     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
809     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
810       MatInfo info;
811       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
812       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
814               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
815               baij->bs,(int)info.memory);CHKERRQ(ierr);
816       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
818       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
819       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
820       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
821       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
822       PetscFunctionReturn(0);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
825       PetscFunctionReturn(0);
826     }
827   }
828 
829   if (isdraw) {
830     PetscDraw       draw;
831     PetscTruth isnull;
832     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
833     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
834   }
835 
836   if (size == 1) {
837     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
838     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
839   } else {
840     /* assemble the entire matrix onto first processor. */
841     Mat         A;
842     Mat_SeqSBAIJ *Aloc;
843     Mat_SeqBAIJ *Bloc;
844     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
845     MatScalar   *a;
846 
847     if (!rank) {
848       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
849     } else {
850       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
851     }
852     PetscLogObjectParent(mat,A);
853 
854     /* copy over the A part */
855     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
856     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
857     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
858 
859     for (i=0; i<mbs; i++) {
860       rvals[0] = bs*(baij->rstart + i);
861       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
862       for (j=ai[i]; j<ai[i+1]; j++) {
863         col = (baij->cstart+aj[j])*bs;
864         for (k=0; k<bs; k++) {
865           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
866           col++; a += bs;
867         }
868       }
869     }
870     /* copy over the B part */
871     Bloc = (Mat_SeqBAIJ*)baij->B->data;
872     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
873     for (i=0; i<mbs; i++) {
874       rvals[0] = bs*(baij->rstart + i);
875       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
876       for (j=ai[i]; j<ai[i+1]; j++) {
877         col = baij->garray[aj[j]]*bs;
878         for (k=0; k<bs; k++) {
879           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
880           col++; a += bs;
881         }
882       }
883     }
884     ierr = PetscFree(rvals);CHKERRQ(ierr);
885     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     /*
888        Everyone has to call to draw the matrix since the graphics waits are
889        synchronized across all processors that share the PetscDraw object
890     */
891     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
892     if (!rank) {
893       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
894       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
895     }
896     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
897     ierr = MatDestroy(A);CHKERRQ(ierr);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatView_MPISBAIJ"
904 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
905 {
906   int        ierr;
907   PetscTruth isascii,isdraw,issocket,isbinary;
908 
909   PetscFunctionBegin;
910   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
911   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
912   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
913   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
914   if (isascii || isdraw || issocket || isbinary) {
915     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
916   } else {
917     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatDestroy_MPISBAIJ"
924 int MatDestroy_MPISBAIJ(Mat mat)
925 {
926   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
927   int         ierr;
928 
929   PetscFunctionBegin;
930 #if defined(PETSC_USE_LOG)
931   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
932 #endif
933   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
934   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
935   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
936   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
937   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
938 #if defined (PETSC_USE_CTABLE)
939   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
940 #else
941   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
942 #endif
943   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
944   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
945   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
946   if (baij->slvec0) {
947     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
948     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
949   }
950   if (baij->slvec1) {
951     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
952     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
953     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
954   }
955   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
956   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
957   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
958   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
959 #if defined(PETSC_USE_MAT_SINGLE)
960   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
961 #endif
962   ierr = PetscFree(baij);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMult_MPISBAIJ"
968 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
969 {
970   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
971   int         ierr,nt,mbs=a->mbs,bs=a->bs;
972   PetscScalar *x,*from,zero=0.0;
973 
974   PetscFunctionBegin;
975   /*
976   PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n");
977   PetscSynchronizedFlush(PETSC_COMM_WORLD);
978   */
979   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
980   if (nt != A->n) {
981     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
982   }
983   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
984   if (nt != A->m) {
985     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
986   }
987 
988   /* diagonal part */
989   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
990   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
991 
992   /* subdiagonal part */
993   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
994 
995   /* copy x into the vec slvec0 */
996   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
997   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
998   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
999   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1000 
1001   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1002   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1003   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1004 
1005   /* supperdiagonal part */
1006   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1007 
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1013 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1014 {
1015   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1016   int         ierr,nt;
1017 
1018   PetscFunctionBegin;
1019   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1020   if (nt != A->n) {
1021     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1022   }
1023   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1024   if (nt != A->m) {
1025     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1026   }
1027 
1028   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1029   /* do diagonal part */
1030   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1031   /* do supperdiagonal part */
1032   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1033   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1034   /* do subdiagonal part */
1035   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1036   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1038 
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1044 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1045 {
1046   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1047   int          ierr,mbs=a->mbs,bs=a->bs;
1048   PetscScalar  *x,*from,zero=0.0;
1049 
1050   PetscFunctionBegin;
1051   /*
1052   PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n");
1053   PetscSynchronizedFlush(PETSC_COMM_WORLD);
1054   */
1055   /* diagonal part */
1056   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1057   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1058 
1059   /* subdiagonal part */
1060   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1061 
1062   /* copy x into the vec slvec0 */
1063   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1064   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1065   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1066   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1067 
1068   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1069   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1070   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1071 
1072   /* supperdiagonal part */
1073   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1080 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1081 {
1082   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1083   int        ierr;
1084 
1085   PetscFunctionBegin;
1086   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1087   /* do diagonal part */
1088   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1089   /* do supperdiagonal part */
1090   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1091   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1092 
1093   /* do subdiagonal part */
1094   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1095   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1096   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1097 
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1103 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   PetscFunctionBegin;
1106   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1107   /* PetscFunctionReturn(0); */
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1112 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1113 {
1114   PetscFunctionBegin;
1115   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1116   /* PetscFunctionReturn(0); */
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1125 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1126 {
1127   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1128   int         ierr;
1129 
1130   PetscFunctionBegin;
1131   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1132   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatScale_MPISBAIJ"
1138 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A)
1139 {
1140   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1141   int         ierr;
1142 
1143   PetscFunctionBegin;
1144   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1145   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1151 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1152 {
1153   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1154   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1155   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1156   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1157   int            *cmap,*idx_p,cstart = mat->cstart;
1158 
1159   PetscFunctionBegin;
1160   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1161   mat->getrowactive = PETSC_TRUE;
1162 
1163   if (!mat->rowvalues && (idx || v)) {
1164     /*
1165         allocate enough space to hold information from the longest row.
1166     */
1167     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1168     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1169     int     max = 1,mbs = mat->mbs,tmp;
1170     for (i=0; i<mbs; i++) {
1171       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1172       if (max < tmp) { max = tmp; }
1173     }
1174     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1175     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1176   }
1177 
1178   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1179   lrow = row - brstart;  /* local row index */
1180 
1181   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1182   if (!v)   {pvA = 0; pvB = 0;}
1183   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1184   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1185   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1186   nztot = nzA + nzB;
1187 
1188   cmap  = mat->garray;
1189   if (v  || idx) {
1190     if (nztot) {
1191       /* Sort by increasing column numbers, assuming A and B already sorted */
1192       int imark = -1;
1193       if (v) {
1194         *v = v_p = mat->rowvalues;
1195         for (i=0; i<nzB; i++) {
1196           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1197           else break;
1198         }
1199         imark = i;
1200         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1201         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1202       }
1203       if (idx) {
1204         *idx = idx_p = mat->rowindices;
1205         if (imark > -1) {
1206           for (i=0; i<imark; i++) {
1207             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1208           }
1209         } else {
1210           for (i=0; i<nzB; i++) {
1211             if (cmap[cworkB[i]/bs] < cstart)
1212               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1213             else break;
1214           }
1215           imark = i;
1216         }
1217         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1218         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1219       }
1220     } else {
1221       if (idx) *idx = 0;
1222       if (v)   *v   = 0;
1223     }
1224   }
1225   *nz = nztot;
1226   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1227   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1233 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1234 {
1235   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1236 
1237   PetscFunctionBegin;
1238   if (baij->getrowactive == PETSC_FALSE) {
1239     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1240   }
1241   baij->getrowactive = PETSC_FALSE;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1247 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1248 {
1249   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1250 
1251   PetscFunctionBegin;
1252   *bs = baij->bs;
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1258 int MatZeroEntries_MPISBAIJ(Mat A)
1259 {
1260   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1261   int         ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1265   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1271 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1272 {
1273   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1274   Mat         A = a->A,B = a->B;
1275   int         ierr;
1276   PetscReal   isend[5],irecv[5];
1277 
1278   PetscFunctionBegin;
1279   info->block_size     = (PetscReal)a->bs;
1280   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1281   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1282   isend[3] = info->memory;  isend[4] = info->mallocs;
1283   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1284   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1285   isend[3] += info->memory;  isend[4] += info->mallocs;
1286   if (flag == MAT_LOCAL) {
1287     info->nz_used      = isend[0];
1288     info->nz_allocated = isend[1];
1289     info->nz_unneeded  = isend[2];
1290     info->memory       = isend[3];
1291     info->mallocs      = isend[4];
1292   } else if (flag == MAT_GLOBAL_MAX) {
1293     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1294     info->nz_used      = irecv[0];
1295     info->nz_allocated = irecv[1];
1296     info->nz_unneeded  = irecv[2];
1297     info->memory       = irecv[3];
1298     info->mallocs      = irecv[4];
1299   } else if (flag == MAT_GLOBAL_SUM) {
1300     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1301     info->nz_used      = irecv[0];
1302     info->nz_allocated = irecv[1];
1303     info->nz_unneeded  = irecv[2];
1304     info->memory       = irecv[3];
1305     info->mallocs      = irecv[4];
1306   } else {
1307     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1308   }
1309   info->rows_global       = (PetscReal)A->M;
1310   info->columns_global    = (PetscReal)A->N;
1311   info->rows_local        = (PetscReal)A->m;
1312   info->columns_local     = (PetscReal)A->N;
1313   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1314   info->fill_ratio_needed = 0;
1315   info->factor_mallocs    = 0;
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1321 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1322 {
1323   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1324   int         ierr;
1325 
1326   PetscFunctionBegin;
1327   switch (op) {
1328   case MAT_NO_NEW_NONZERO_LOCATIONS:
1329   case MAT_YES_NEW_NONZERO_LOCATIONS:
1330   case MAT_COLUMNS_UNSORTED:
1331   case MAT_COLUMNS_SORTED:
1332   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1333   case MAT_KEEP_ZEROED_ROWS:
1334   case MAT_NEW_NONZERO_LOCATION_ERR:
1335     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1336     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1337     break;
1338   case MAT_ROW_ORIENTED:
1339     a->roworiented = PETSC_TRUE;
1340     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1341     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1342     break;
1343   case MAT_ROWS_SORTED:
1344   case MAT_ROWS_UNSORTED:
1345   case MAT_YES_NEW_DIAGONALS:
1346   case MAT_USE_SINGLE_PRECISION_SOLVES:
1347     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1348     break;
1349   case MAT_COLUMN_ORIENTED:
1350     a->roworiented = PETSC_FALSE;
1351     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1352     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1353     break;
1354   case MAT_IGNORE_OFF_PROC_ENTRIES:
1355     a->donotstash = PETSC_TRUE;
1356     break;
1357   case MAT_NO_NEW_DIAGONALS:
1358     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1359   case MAT_USE_HASH_TABLE:
1360     a->ht_flag = PETSC_TRUE;
1361     break;
1362   default:
1363     SETERRQ(PETSC_ERR_SUP,"unknown option");
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1370 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1371 {
1372   int ierr;
1373   PetscFunctionBegin;
1374   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1380 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1381 {
1382   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1383   Mat         a = baij->A,b = baij->B;
1384   int         ierr,s1,s2,s3;
1385 
1386   PetscFunctionBegin;
1387   if (ll != rr) {
1388     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1389   }
1390   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1391   if (rr) {
1392     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1393     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1394     /* Overlap communication with computation. */
1395     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1396     /*} if (ll) { */
1397     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1398     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1399     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1400     /* } */
1401   /* scale  the diagonal block */
1402   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1403 
1404   /* if (rr) { */
1405     /* Do a scatter end and then right scale the off-diagonal block */
1406     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1407     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1408   }
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1415 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag)
1416 {
1417   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1418   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1419   int            *procs,*nprocs,j,idx,nsends,*work,row;
1420   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1421   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1422   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1423   MPI_Comm       comm = A->comm;
1424   MPI_Request    *send_waits,*recv_waits;
1425   MPI_Status     recv_status,*send_status;
1426   IS             istmp;
1427   PetscTruth     found;
1428 
1429   PetscFunctionBegin;
1430   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1431   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1432 
1433   /*  first count number of contributors to each processor */
1434   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1435   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1436   procs = nprocs + size;
1437   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1438   for (i=0; i<N; i++) {
1439     idx   = rows[i];
1440     found = PETSC_FALSE;
1441     for (j=0; j<size; j++) {
1442       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1443         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1444       }
1445     }
1446     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1447   }
1448   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1449 
1450   /* inform other processors of number of messages and max length*/
1451   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1452   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1453   nmax   = work[rank];
1454   nrecvs = work[size+rank];
1455   ierr   = PetscFree(work);CHKERRQ(ierr);
1456 
1457   /* post receives:   */
1458   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1459   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1460   for (i=0; i<nrecvs; i++) {
1461     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1462   }
1463 
1464   /* do sends:
1465      1) starts[i] gives the starting index in svalues for stuff going to
1466      the ith processor
1467   */
1468   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1469   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1470   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1471   starts[0]  = 0;
1472   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1473   for (i=0; i<N; i++) {
1474     svalues[starts[owner[i]]++] = rows[i];
1475   }
1476   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1477 
1478   starts[0] = 0;
1479   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1480   count = 0;
1481   for (i=0; i<size; i++) {
1482     if (procs[i]) {
1483       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1484     }
1485   }
1486   ierr = PetscFree(starts);CHKERRQ(ierr);
1487 
1488   base = owners[rank]*bs;
1489 
1490   /*  wait on receives */
1491   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1492   source = lens + nrecvs;
1493   count  = nrecvs; slen = 0;
1494   while (count) {
1495     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1496     /* unpack receives into our local space */
1497     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1498     source[imdex]  = recv_status.MPI_SOURCE;
1499     lens[imdex]    = n;
1500     slen          += n;
1501     count--;
1502   }
1503   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1504 
1505   /* move the data into the send scatter */
1506   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1507   count = 0;
1508   for (i=0; i<nrecvs; i++) {
1509     values = rvalues + i*nmax;
1510     for (j=0; j<lens[i]; j++) {
1511       lrows[count++] = values[j] - base;
1512     }
1513   }
1514   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1515   ierr = PetscFree(lens);CHKERRQ(ierr);
1516   ierr = PetscFree(owner);CHKERRQ(ierr);
1517   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1518 
1519   /* actually zap the local rows */
1520   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1521   PetscLogObjectParent(A,istmp);
1522 
1523   /*
1524         Zero the required rows. If the "diagonal block" of the matrix
1525      is square and the user wishes to set the diagonal we use seperate
1526      code so that MatSetValues() is not called for each diagonal allocating
1527      new memory, thus calling lots of mallocs and slowing things down.
1528 
1529        Contributed by: Mathew Knepley
1530   */
1531   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1532   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1533   if (diag && (l->A->M == l->A->N)) {
1534     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1535   } else if (diag) {
1536     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1537     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1538       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1540     }
1541     for (i=0; i<slen; i++) {
1542       row  = lrows[i] + rstart_bs;
1543       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1544     }
1545     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1546     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547   } else {
1548     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1549   }
1550 
1551   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1552   ierr = PetscFree(lrows);CHKERRQ(ierr);
1553 
1554   /* wait on sends */
1555   if (nsends) {
1556     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1557     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1558     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1559   }
1560   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1561   ierr = PetscFree(svalues);CHKERRQ(ierr);
1562 
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1568 int MatPrintHelp_MPISBAIJ(Mat A)
1569 {
1570   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1571   MPI_Comm    comm = A->comm;
1572   static int  called = 0;
1573   int         ierr;
1574 
1575   PetscFunctionBegin;
1576   if (!a->rank) {
1577     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1578   }
1579   if (called) {PetscFunctionReturn(0);} else called = 1;
1580   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1581   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1587 int MatSetUnfactored_MPISBAIJ(Mat A)
1588 {
1589   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1590   int         ierr;
1591 
1592   PetscFunctionBegin;
1593   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatEqual_MPISBAIJ"
1601 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1602 {
1603   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1604   Mat         a,b,c,d;
1605   PetscTruth  flg;
1606   int         ierr;
1607 
1608   PetscFunctionBegin;
1609   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr);
1610   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1611   a = matA->A; b = matA->B;
1612   c = matB->A; d = matB->B;
1613 
1614   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1615   if (flg == PETSC_TRUE) {
1616     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1617   }
1618   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1624 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1625 {
1626   int        ierr;
1627 
1628   PetscFunctionBegin;
1629   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1630   PetscFunctionReturn(0);
1631 }
1632 /* -------------------------------------------------------------------*/
1633 static struct _MatOps MatOps_Values = {
1634   MatSetValues_MPISBAIJ,
1635   MatGetRow_MPISBAIJ,
1636   MatRestoreRow_MPISBAIJ,
1637   MatMult_MPISBAIJ,
1638   MatMultAdd_MPISBAIJ,
1639   MatMultTranspose_MPISBAIJ,
1640   MatMultTransposeAdd_MPISBAIJ,
1641   0,
1642   0,
1643   0,
1644   0,
1645   0,
1646   0,
1647   MatRelax_MPISBAIJ,
1648   MatTranspose_MPISBAIJ,
1649   MatGetInfo_MPISBAIJ,
1650   MatEqual_MPISBAIJ,
1651   MatGetDiagonal_MPISBAIJ,
1652   MatDiagonalScale_MPISBAIJ,
1653   MatNorm_MPISBAIJ,
1654   MatAssemblyBegin_MPISBAIJ,
1655   MatAssemblyEnd_MPISBAIJ,
1656   0,
1657   MatSetOption_MPISBAIJ,
1658   MatZeroEntries_MPISBAIJ,
1659   MatZeroRows_MPISBAIJ,
1660   0,
1661   0,
1662   0,
1663   0,
1664   MatSetUpPreallocation_MPISBAIJ,
1665   0,
1666   0,
1667   0,
1668   0,
1669   MatDuplicate_MPISBAIJ,
1670   0,
1671   0,
1672   0,
1673   0,
1674   0,
1675   MatGetSubMatrices_MPISBAIJ,
1676   MatIncreaseOverlap_MPISBAIJ,
1677   MatGetValues_MPISBAIJ,
1678   0,
1679   MatPrintHelp_MPISBAIJ,
1680   MatScale_MPISBAIJ,
1681   0,
1682   0,
1683   0,
1684   MatGetBlockSize_MPISBAIJ,
1685   0,
1686   0,
1687   0,
1688   0,
1689   0,
1690   0,
1691   MatSetUnfactored_MPISBAIJ,
1692   0,
1693   MatSetValuesBlocked_MPISBAIJ,
1694   0,
1695   0,
1696   0,
1697   MatGetPetscMaps_Petsc,
1698   0,
1699   0,
1700   0,
1701   0,
1702   0,
1703   0,
1704   MatGetRowMax_MPISBAIJ};
1705 
1706 
1707 EXTERN_C_BEGIN
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1710 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1711 {
1712   PetscFunctionBegin;
1713   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1714   *iscopy = PETSC_FALSE;
1715   PetscFunctionReturn(0);
1716 }
1717 EXTERN_C_END
1718 
1719 EXTERN_C_BEGIN
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatCreate_MPISBAIJ"
1722 int MatCreate_MPISBAIJ(Mat B)
1723 {
1724   Mat_MPISBAIJ *b;
1725   int          ierr;
1726   PetscTruth   flg;
1727 
1728   PetscFunctionBegin;
1729 
1730   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1731   B->data = (void*)b;
1732   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1733   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1734 
1735   B->ops->destroy    = MatDestroy_MPISBAIJ;
1736   B->ops->view       = MatView_MPISBAIJ;
1737   B->mapping    = 0;
1738   B->factor     = 0;
1739   B->assembled  = PETSC_FALSE;
1740 
1741   B->insertmode = NOT_SET_VALUES;
1742   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1743   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1744 
1745   /* build local table of row and column ownerships */
1746   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1747   b->cowners    = b->rowners + b->size + 2;
1748   b->rowners_bs = b->cowners + b->size + 2;
1749   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1750 
1751   /* build cache for off array entries formed */
1752   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1753   b->donotstash  = PETSC_FALSE;
1754   b->colmap      = PETSC_NULL;
1755   b->garray      = PETSC_NULL;
1756   b->roworiented = PETSC_TRUE;
1757 
1758 #if defined(PETSC_USE_MAT_SINGLE)
1759   /* stuff for MatSetValues_XXX in single precision */
1760   b->setvalueslen     = 0;
1761   b->setvaluescopy    = PETSC_NULL;
1762 #endif
1763 
1764   /* stuff used in block assembly */
1765   b->barray       = 0;
1766 
1767   /* stuff used for matrix vector multiply */
1768   b->lvec         = 0;
1769   b->Mvctx        = 0;
1770   b->slvec0       = 0;
1771   b->slvec0b      = 0;
1772   b->slvec1       = 0;
1773   b->slvec1a      = 0;
1774   b->slvec1b      = 0;
1775   b->sMvctx       = 0;
1776 
1777   /* stuff for MatGetRow() */
1778   b->rowindices   = 0;
1779   b->rowvalues    = 0;
1780   b->getrowactive = PETSC_FALSE;
1781 
1782   /* hash table stuff */
1783   b->ht           = 0;
1784   b->hd           = 0;
1785   b->ht_size      = 0;
1786   b->ht_flag      = PETSC_FALSE;
1787   b->ht_fact      = 0;
1788   b->ht_total_ct  = 0;
1789   b->ht_insert_ct = 0;
1790 
1791   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1792   if (flg) {
1793     PetscReal fact = 1.39;
1794     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1795     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1796     if (fact <= 1.0) fact = 1.39;
1797     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1798     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1799   }
1800   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1801                                      "MatStoreValues_MPISBAIJ",
1802                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1804                                      "MatRetrieveValues_MPISBAIJ",
1805                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1807                                      "MatGetDiagonalBlock_MPISBAIJ",
1808                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 EXTERN_C_END
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1815 /*@C
1816    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1817    the user should preallocate the matrix storage by setting the parameters
1818    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1819    performance can be increased by more than a factor of 50.
1820 
1821    Collective on Mat
1822 
1823    Input Parameters:
1824 +  A - the matrix
1825 .  bs   - size of blockk
1826 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1827            submatrix  (same for all local rows)
1828 .  d_nnz - array containing the number of block nonzeros in the various block rows
1829            in the upper triangular and diagonal part of the in diagonal portion of the local
1830            (possibly different for each block row) or PETSC_NULL.  You must leave room
1831            for the diagonal entry even if it is zero.
1832 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1833            submatrix (same for all local rows).
1834 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1835            off-diagonal portion of the local submatrix (possibly different for
1836            each block row) or PETSC_NULL.
1837 
1838 
1839    Options Database Keys:
1840 .   -mat_no_unroll - uses code that does not unroll the loops in the
1841                      block calculations (much slower)
1842 .   -mat_block_size - size of the blocks to use
1843 
1844    Notes:
1845 
1846    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1847    than it must be used on all processors that share the object for that argument.
1848 
1849    Storage Information:
1850    For a square global matrix we define each processor's diagonal portion
1851    to be its local rows and the corresponding columns (a square submatrix);
1852    each processor's off-diagonal portion encompasses the remainder of the
1853    local matrix (a rectangular submatrix).
1854 
1855    The user can specify preallocated storage for the diagonal part of
1856    the local submatrix with either d_nz or d_nnz (not both).  Set
1857    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1858    memory allocation.  Likewise, specify preallocated storage for the
1859    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1860 
1861    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1862    the figure below we depict these three local rows and all columns (0-11).
1863 
1864 .vb
1865            0 1 2 3 4 5 6 7 8 9 10 11
1866           -------------------
1867    row 3  |  o o o d d d o o o o o o
1868    row 4  |  o o o d d d o o o o o o
1869    row 5  |  o o o d d d o o o o o o
1870           -------------------
1871 .ve
1872 
1873    Thus, any entries in the d locations are stored in the d (diagonal)
1874    submatrix, and any entries in the o locations are stored in the
1875    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1876    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1877 
1878    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1879    plus the diagonal part of the d matrix,
1880    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1881    In general, for PDE problems in which most nonzeros are near the diagonal,
1882    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1883    or you will get TERRIBLE performance; see the users' manual chapter on
1884    matrices.
1885 
1886    Level: intermediate
1887 
1888 .keywords: matrix, block, aij, compressed row, sparse, parallel
1889 
1890 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1891 @*/
1892 
1893 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1894 {
1895   Mat_MPISBAIJ *b;
1896   int          ierr,i,mbs,Mbs;
1897   PetscTruth   flg2;
1898 
1899   PetscFunctionBegin;
1900   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr);
1901   if (!flg2) PetscFunctionReturn(0);
1902 
1903   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1904 
1905   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1906   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1907   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1908   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1909   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1910   if (d_nnz) {
1911     for (i=0; i<B->m/bs; i++) {
1912       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1913     }
1914   }
1915   if (o_nnz) {
1916     for (i=0; i<B->m/bs; i++) {
1917       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1918     }
1919   }
1920   B->preallocated = PETSC_TRUE;
1921   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1922   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1923   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1924   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1925 
1926   b   = (Mat_MPISBAIJ*)B->data;
1927   mbs = B->m/bs;
1928   Mbs = B->M/bs;
1929   if (mbs*bs != B->m) {
1930     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1931   }
1932 
1933   b->bs  = bs;
1934   b->bs2 = bs*bs;
1935   b->mbs = mbs;
1936   b->nbs = mbs;
1937   b->Mbs = Mbs;
1938   b->Nbs = Mbs;
1939 
1940   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1941   b->rowners[0]    = 0;
1942   for (i=2; i<=b->size; i++) {
1943     b->rowners[i] += b->rowners[i-1];
1944   }
1945   b->rstart    = b->rowners[b->rank];
1946   b->rend      = b->rowners[b->rank+1];
1947   b->cstart    = b->rstart;
1948   b->cend      = b->rend;
1949   for (i=0; i<=b->size; i++) {
1950     b->rowners_bs[i] = b->rowners[i]*bs;
1951   }
1952   b->rstart_bs = b-> rstart*bs;
1953   b->rend_bs   = b->rend*bs;
1954 
1955   b->cstart_bs = b->cstart*bs;
1956   b->cend_bs   = b->cend*bs;
1957 
1958 
1959   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1960   PetscLogObjectParent(B,b->A);
1961   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1962   PetscLogObjectParent(B,b->B);
1963 
1964   /* build cache for off array entries formed */
1965   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1966 
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatCreateMPISBAIJ"
1972 /*@C
1973    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1974    (block compressed row).  For good matrix assembly performance
1975    the user should preallocate the matrix storage by setting the parameters
1976    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1977    performance can be increased by more than a factor of 50.
1978 
1979    Collective on MPI_Comm
1980 
1981    Input Parameters:
1982 +  comm - MPI communicator
1983 .  bs   - size of blockk
1984 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1985            This value should be the same as the local size used in creating the
1986            y vector for the matrix-vector product y = Ax.
1987 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1988            This value should be the same as the local size used in creating the
1989            x vector for the matrix-vector product y = Ax.
1990 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1991 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1992 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1993            submatrix  (same for all local rows)
1994 .  d_nnz - array containing the number of block nonzeros in the various block rows
1995            in the upper triangular portion of the in diagonal portion of the local
1996            (possibly different for each block block row) or PETSC_NULL.
1997            You must leave room for the diagonal entry even if it is zero.
1998 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1999            submatrix (same for all local rows).
2000 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2001            off-diagonal portion of the local submatrix (possibly different for
2002            each block row) or PETSC_NULL.
2003 
2004    Output Parameter:
2005 .  A - the matrix
2006 
2007    Options Database Keys:
2008 .   -mat_no_unroll - uses code that does not unroll the loops in the
2009                      block calculations (much slower)
2010 .   -mat_block_size - size of the blocks to use
2011 .   -mat_mpi - use the parallel matrix data structures even on one processor
2012                (defaults to using SeqBAIJ format on one processor)
2013 
2014    Notes:
2015    The user MUST specify either the local or global matrix dimensions
2016    (possibly both).
2017 
2018    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2019    than it must be used on all processors that share the object for that argument.
2020 
2021    Storage Information:
2022    For a square global matrix we define each processor's diagonal portion
2023    to be its local rows and the corresponding columns (a square submatrix);
2024    each processor's off-diagonal portion encompasses the remainder of the
2025    local matrix (a rectangular submatrix).
2026 
2027    The user can specify preallocated storage for the diagonal part of
2028    the local submatrix with either d_nz or d_nnz (not both).  Set
2029    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2030    memory allocation.  Likewise, specify preallocated storage for the
2031    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2032 
2033    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2034    the figure below we depict these three local rows and all columns (0-11).
2035 
2036 .vb
2037            0 1 2 3 4 5 6 7 8 9 10 11
2038           -------------------
2039    row 3  |  o o o d d d o o o o o o
2040    row 4  |  o o o d d d o o o o o o
2041    row 5  |  o o o d d d o o o o o o
2042           -------------------
2043 .ve
2044 
2045    Thus, any entries in the d locations are stored in the d (diagonal)
2046    submatrix, and any entries in the o locations are stored in the
2047    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2048    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2049 
2050    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2051    plus the diagonal part of the d matrix,
2052    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2053    In general, for PDE problems in which most nonzeros are near the diagonal,
2054    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2055    or you will get TERRIBLE performance; see the users' manual chapter on
2056    matrices.
2057 
2058    Level: intermediate
2059 
2060 .keywords: matrix, block, aij, compressed row, sparse, parallel
2061 
2062 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2063 @*/
2064 
2065 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2066 {
2067   int ierr,size;
2068 
2069   PetscFunctionBegin;
2070   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2071   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2072   if (size > 1) {
2073     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2074     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2075   } else {
2076     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2077     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2078   }
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2085 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2086 {
2087   Mat          mat;
2088   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2089   int          ierr,len=0;
2090 
2091   PetscFunctionBegin;
2092   *newmat       = 0;
2093   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2094   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2095   mat->preallocated = PETSC_TRUE;
2096   a = (Mat_MPISBAIJ*)mat->data;
2097   a->bs  = oldmat->bs;
2098   a->bs2 = oldmat->bs2;
2099   a->mbs = oldmat->mbs;
2100   a->nbs = oldmat->nbs;
2101   a->Mbs = oldmat->Mbs;
2102   a->Nbs = oldmat->Nbs;
2103 
2104   a->rstart       = oldmat->rstart;
2105   a->rend         = oldmat->rend;
2106   a->cstart       = oldmat->cstart;
2107   a->cend         = oldmat->cend;
2108   a->size         = oldmat->size;
2109   a->rank         = oldmat->rank;
2110   a->donotstash   = oldmat->donotstash;
2111   a->roworiented  = oldmat->roworiented;
2112   a->rowindices   = 0;
2113   a->rowvalues    = 0;
2114   a->getrowactive = PETSC_FALSE;
2115   a->barray       = 0;
2116   a->rstart_bs    = oldmat->rstart_bs;
2117   a->rend_bs      = oldmat->rend_bs;
2118   a->cstart_bs    = oldmat->cstart_bs;
2119   a->cend_bs      = oldmat->cend_bs;
2120 
2121   /* hash table stuff */
2122   a->ht           = 0;
2123   a->hd           = 0;
2124   a->ht_size      = 0;
2125   a->ht_flag      = oldmat->ht_flag;
2126   a->ht_fact      = oldmat->ht_fact;
2127   a->ht_total_ct  = 0;
2128   a->ht_insert_ct = 0;
2129 
2130   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2131   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2132   a->cowners    = a->rowners + a->size + 2;
2133   a->rowners_bs = a->cowners + a->size + 2;
2134   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2135   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2136   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2137   if (oldmat->colmap) {
2138 #if defined (PETSC_USE_CTABLE)
2139     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2140 #else
2141     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2142     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2143     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2144 #endif
2145   } else a->colmap = 0;
2146   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2147     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2148     PetscLogObjectMemory(mat,len*sizeof(int));
2149     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2150   } else a->garray = 0;
2151 
2152   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2153   PetscLogObjectParent(mat,a->lvec);
2154   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2155 
2156   PetscLogObjectParent(mat,a->Mvctx);
2157   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2158   PetscLogObjectParent(mat,a->A);
2159   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2160   PetscLogObjectParent(mat,a->B);
2161   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2162   *newmat = mat;
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #include "petscsys.h"
2167 
2168 EXTERN_C_BEGIN
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatLoad_MPISBAIJ"
2171 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2172 {
2173   Mat          A;
2174   int          i,nz,ierr,j,rstart,rend,fd;
2175   PetscScalar  *vals,*buf;
2176   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2177   MPI_Status   status;
2178   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2179   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2180   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2181   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2182   int          dcount,kmax,k,nzcount,tmp;
2183 
2184   PetscFunctionBegin;
2185   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2186 
2187   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2188   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2189   if (!rank) {
2190     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2191     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2192     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2193     if (header[3] < 0) {
2194       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2195     }
2196   }
2197 
2198   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2199   M = header[1]; N = header[2];
2200 
2201   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2202 
2203   /*
2204      This code adds extra rows to make sure the number of rows is
2205      divisible by the blocksize
2206   */
2207   Mbs        = M/bs;
2208   extra_rows = bs - M + bs*(Mbs);
2209   if (extra_rows == bs) extra_rows = 0;
2210   else                  Mbs++;
2211   if (extra_rows &&!rank) {
2212     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2213   }
2214 
2215   /* determine ownership of all rows */
2216   mbs        = Mbs/size + ((Mbs % size) > rank);
2217   m          = mbs*bs;
2218   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2219   browners   = rowners + size + 1;
2220   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2221   rowners[0] = 0;
2222   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2223   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2224   rstart = rowners[rank];
2225   rend   = rowners[rank+1];
2226 
2227   /* distribute row lengths to all processors */
2228   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2229   if (!rank) {
2230     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2231     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2232     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2233     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2234     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2235     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2236     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2237   } else {
2238     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2239   }
2240 
2241   if (!rank) {   /* procs[0] */
2242     /* calculate the number of nonzeros on each processor */
2243     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2244     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2245     for (i=0; i<size; i++) {
2246       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2247         procsnz[i] += rowlengths[j];
2248       }
2249     }
2250     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2251 
2252     /* determine max buffer needed and allocate it */
2253     maxnz = 0;
2254     for (i=0; i<size; i++) {
2255       maxnz = PetscMax(maxnz,procsnz[i]);
2256     }
2257     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2258 
2259     /* read in my part of the matrix column indices  */
2260     nz     = procsnz[0];
2261     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2262     mycols = ibuf;
2263     if (size == 1)  nz -= extra_rows;
2264     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2265     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2266 
2267     /* read in every ones (except the last) and ship off */
2268     for (i=1; i<size-1; i++) {
2269       nz   = procsnz[i];
2270       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2271       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2272     }
2273     /* read in the stuff for the last proc */
2274     if (size != 1) {
2275       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2276       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2277       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2278       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2279     }
2280     ierr = PetscFree(cols);CHKERRQ(ierr);
2281   } else {  /* procs[i], i>0 */
2282     /* determine buffer space needed for message */
2283     nz = 0;
2284     for (i=0; i<m; i++) {
2285       nz += locrowlens[i];
2286     }
2287     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2288     mycols = ibuf;
2289     /* receive message of column indices*/
2290     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2291     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2292     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2293   }
2294 
2295   /* loop over local rows, determining number of off diagonal entries */
2296   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2297   odlens   = dlens + (rend-rstart);
2298   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2299   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2300   masked1  = mask    + Mbs;
2301   masked2  = masked1 + Mbs;
2302   rowcount = 0; nzcount = 0;
2303   for (i=0; i<mbs; i++) {
2304     dcount  = 0;
2305     odcount = 0;
2306     for (j=0; j<bs; j++) {
2307       kmax = locrowlens[rowcount];
2308       for (k=0; k<kmax; k++) {
2309         tmp = mycols[nzcount++]/bs; /* block col. index */
2310         if (!mask[tmp]) {
2311           mask[tmp] = 1;
2312           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2313           else masked1[dcount++] = tmp; /* entry in diag portion */
2314         }
2315       }
2316       rowcount++;
2317     }
2318 
2319     dlens[i]  = dcount;  /* d_nzz[i] */
2320     odlens[i] = odcount; /* o_nzz[i] */
2321 
2322     /* zero out the mask elements we set */
2323     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2324     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2325   }
2326 
2327   /* create our matrix */
2328   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2329   CHKERRQ(ierr);
2330   A = *newmat;
2331   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2332 
2333   if (!rank) {
2334     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2335     /* read in my part of the matrix numerical values  */
2336     nz = procsnz[0];
2337     vals = buf;
2338     mycols = ibuf;
2339     if (size == 1)  nz -= extra_rows;
2340     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2341     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2342 
2343     /* insert into matrix */
2344     jj      = rstart*bs;
2345     for (i=0; i<m; i++) {
2346       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2347       mycols += locrowlens[i];
2348       vals   += locrowlens[i];
2349       jj++;
2350     }
2351 
2352     /* read in other processors (except the last one) and ship out */
2353     for (i=1; i<size-1; i++) {
2354       nz   = procsnz[i];
2355       vals = buf;
2356       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2357       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2358     }
2359     /* the last proc */
2360     if (size != 1){
2361       nz   = procsnz[i] - extra_rows;
2362       vals = buf;
2363       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2364       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2365       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2366     }
2367     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2368 
2369   } else {
2370     /* receive numeric values */
2371     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2372 
2373     /* receive message of values*/
2374     vals   = buf;
2375     mycols = ibuf;
2376     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2377     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2378     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2379 
2380     /* insert into matrix */
2381     jj      = rstart*bs;
2382     for (i=0; i<m; i++) {
2383       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2384       mycols += locrowlens[i];
2385       vals   += locrowlens[i];
2386       jj++;
2387     }
2388   }
2389 
2390   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2391   ierr = PetscFree(buf);CHKERRQ(ierr);
2392   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2393   ierr = PetscFree(rowners);CHKERRQ(ierr);
2394   ierr = PetscFree(dlens);CHKERRQ(ierr);
2395   ierr = PetscFree(mask);CHKERRQ(ierr);
2396   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2397   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2398   PetscFunctionReturn(0);
2399 }
2400 EXTERN_C_END
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2404 /*@
2405    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2406 
2407    Input Parameters:
2408 .  mat  - the matrix
2409 .  fact - factor
2410 
2411    Collective on Mat
2412 
2413    Level: advanced
2414 
2415   Notes:
2416    This can also be set by the command line option: -mat_use_hash_table fact
2417 
2418 .keywords: matrix, hashtable, factor, HT
2419 
2420 .seealso: MatSetOption()
2421 @*/
2422 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2423 {
2424   PetscFunctionBegin;
2425   SETERRQ(1,"Function not yet written for SBAIJ format");
2426   /* PetscFunctionReturn(0); */
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2431 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2432 {
2433   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2434   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2435   PetscReal    atmp;
2436   PetscReal    *work,*svalues,*rvalues;
2437   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2438   int          rank,size,*rowners_bs,dest,count,source;
2439   PetscScalar  *va;
2440   MatScalar    *ba;
2441   MPI_Status   stat;
2442 
2443   PetscFunctionBegin;
2444   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2445   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2446 
2447   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
2448   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
2449 
2450   bs   = a->bs;
2451   mbs  = a->mbs;
2452   Mbs  = a->Mbs;
2453   ba   = b->a;
2454   bi   = b->i;
2455   bj   = b->j;
2456   /*
2457   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2458   PetscSynchronizedFlush(PETSC_COMM_WORLD);
2459   */
2460 
2461   /* find ownerships */
2462   rowners_bs = a->rowners_bs;
2463   /*
2464   if (!rank){
2465     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2466   }
2467   */
2468 
2469   /* each proc creates an array to be distributed */
2470   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2471   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2472 
2473   /* row_max for B */
2474   if (rank != size-1){
2475     for (i=0; i<mbs; i++) {
2476       ncols = bi[1] - bi[0]; bi++;
2477       brow  = bs*i;
2478       for (j=0; j<ncols; j++){
2479         bcol = bs*(*bj);
2480         for (kcol=0; kcol<bs; kcol++){
2481           col = bcol + kcol;                 /* local col index */
2482           col += rowners_bs[rank+1];      /* global col index */
2483           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2484           for (krow=0; krow<bs; krow++){
2485             atmp = PetscAbsScalar(*ba); ba++;
2486             row = brow + krow;    /* local row index */
2487             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2488             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2489             if (work[col] < atmp) work[col] = atmp;
2490           }
2491         }
2492         bj++;
2493       }
2494     }
2495     /*
2496       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2497       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2498       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2499       */
2500 
2501     /* send values to its owners */
2502     for (dest=rank+1; dest<size; dest++){
2503       svalues = work + rowners_bs[dest];
2504       count   = rowners_bs[dest+1]-rowners_bs[dest];
2505       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr);
2506       /*
2507       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2508       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2509       */
2510     }
2511   }
2512 
2513   /* receive values */
2514   if (rank){
2515     rvalues = work;
2516     count   = rowners_bs[rank+1]-rowners_bs[rank];
2517     for (source=0; source<rank; source++){
2518       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr);
2519       /* process values */
2520       for (i=0; i<count; i++){
2521         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2522       }
2523       /*
2524       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2525       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2526       */
2527     }
2528   }
2529 
2530   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2531   ierr = PetscFree(work);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatRelax_MPISBAIJ"
2537 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2538 {
2539   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2540   int            ierr,mbs=mat->mbs,bs=mat->bs;
2541   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2542   Vec            bb1;
2543 
2544   PetscFunctionBegin;
2545   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2546   if (bs > 1)
2547     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2548 
2549   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2550     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2551       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2552       its--;
2553     }
2554 
2555     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2556     while (its--){
2557 
2558       /* lower triangular part: slvec0b = - B^T*xx */
2559       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2560 
2561       /* copy xx into slvec0a */
2562       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2563       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2564       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2565       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2566 
2567       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2568 
2569       /* copy bb into slvec1a */
2570       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2571       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2572       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2573       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2574 
2575       /* set slvec1b = 0 */
2576       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2577 
2578       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2579       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2580       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2581       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2582 
2583       /* upper triangular part: bb1 = bb1 - B*x */
2584       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2585 
2586       /* local diagonal sweep */
2587       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2588     }
2589     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2590   } else {
2591     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2592   }
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2598 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2599 {
2600   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2601   int            ierr;
2602   PetscScalar    mone=-1.0;
2603   Vec            lvec1,bb1;
2604 
2605   PetscFunctionBegin;
2606   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2607   if (mat->bs > 1)
2608     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2609 
2610   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2611     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2612       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2613       its--;
2614     }
2615 
2616     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2617     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2618     while (its--){
2619       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2620 
2621       /* lower diagonal part: bb1 = bb - B^T*xx */
2622       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2623       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2624 
2625       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2626       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2627       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2628 
2629       /* upper diagonal part: bb1 = bb1 - B*x */
2630       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2631       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2632 
2633       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2634 
2635       /* diagonal sweep */
2636       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2637     }
2638     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2639     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2640   } else {
2641     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2642   }
2643   PetscFunctionReturn(0);
2644 }
2645 
2646