xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision f7e0d671db41bd0b4d3ca67f19598abe511c270a)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 int MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   int          ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 int MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   int          ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 /*
75      Local utility routine that creates a mapping from the global column
76    number to the local number in the off-diagonal part of the local
77    storage of the matrix.  This is done in a non scable way since the
78    length of colmap equals the global matrix length.
79 */
80 #undef __FUNCT__
81 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
82 static int CreateColmap_MPISBAIJ_Private(Mat mat)
83 {
84   PetscFunctionBegin;
85   SETERRQ(1,"Function not yet written for SBAIJ format");
86   /* PetscFunctionReturn(0); */
87 }
88 
89 #define CHUNKSIZE  10
90 
91 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
92 { \
93  \
94     brow = row/bs;  \
95     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
96     rmax = aimax[brow]; nrow = ailen[brow]; \
97       bcol = col/bs; \
98       ridx = row % bs; cidx = col % bs; \
99       low = 0; high = nrow; \
100       while (high-low > 3) { \
101         t = (low+high)/2; \
102         if (rp[t] > bcol) high = t; \
103         else              low  = t; \
104       } \
105       for (_i=low; _i<high; _i++) { \
106         if (rp[_i] > bcol) break; \
107         if (rp[_i] == bcol) { \
108           bap  = ap +  bs2*_i + bs*cidx + ridx; \
109           if (addv == ADD_VALUES) *bap += value;  \
110           else                    *bap  = value;  \
111           goto a_noinsert; \
112         } \
113       } \
114       if (a->nonew == 1) goto a_noinsert; \
115       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
116       if (nrow >= rmax) { \
117         /* there is no extra room in row, therefore enlarge */ \
118         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
119         MatScalar *new_a; \
120  \
121         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
122  \
123         /* malloc new storage space */ \
124         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
125         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
126         new_j = (int*)(new_a + bs2*new_nz); \
127         new_i = new_j + new_nz; \
128  \
129         /* copy over old data into new slots */ \
130         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
131         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
132         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
133         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
134         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
135         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
136         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
137         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
138                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
139         /* free up old matrix storage */ \
140         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
141         if (!a->singlemalloc) { \
142           ierr = PetscFree(a->i);CHKERRQ(ierr); \
143           ierr = PetscFree(a->j);CHKERRQ(ierr);\
144         } \
145         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
146         a->singlemalloc = PETSC_TRUE; \
147  \
148         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
149         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
150         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
151         a->s_maxnz += bs2*CHUNKSIZE; \
152         a->reallocs++; \
153         a->s_nz++; \
154       } \
155       N = nrow++ - 1;  \
156       /* shift up all the later entries in this row */ \
157       for (ii=N; ii>=_i; ii--) { \
158         rp[ii+1] = rp[ii]; \
159         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
160       } \
161       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
162       rp[_i]                      = bcol;  \
163       ap[bs2*_i + bs*cidx + ridx] = value;  \
164       a_noinsert:; \
165     ailen[brow] = nrow; \
166 }
167 #ifndef MatSetValues_SeqBAIJ_B_Private
168 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
169 { \
170     brow = row/bs;  \
171     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
172     rmax = bimax[brow]; nrow = bilen[brow]; \
173       bcol = col/bs; \
174       ridx = row % bs; cidx = col % bs; \
175       low = 0; high = nrow; \
176       while (high-low > 3) { \
177         t = (low+high)/2; \
178         if (rp[t] > bcol) high = t; \
179         else              low  = t; \
180       } \
181       for (_i=low; _i<high; _i++) { \
182         if (rp[_i] > bcol) break; \
183         if (rp[_i] == bcol) { \
184           bap  = ap +  bs2*_i + bs*cidx + ridx; \
185           if (addv == ADD_VALUES) *bap += value;  \
186           else                    *bap  = value;  \
187           goto b_noinsert; \
188         } \
189       } \
190       if (b->nonew == 1) goto b_noinsert; \
191       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
192       if (nrow >= rmax) { \
193         /* there is no extra room in row, therefore enlarge */ \
194         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
195         MatScalar *new_a; \
196  \
197         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
198  \
199         /* malloc new storage space */ \
200         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
201         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
202         new_j = (int*)(new_a + bs2*new_nz); \
203         new_i = new_j + new_nz; \
204  \
205         /* copy over old data into new slots */ \
206         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
207         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
208         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
209         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
210         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
211         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
212         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
214                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
215         /* free up old matrix storage */ \
216         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
217         if (!b->singlemalloc) { \
218           ierr = PetscFree(b->i);CHKERRQ(ierr); \
219           ierr = PetscFree(b->j);CHKERRQ(ierr); \
220         } \
221         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
222         b->singlemalloc = PETSC_TRUE; \
223  \
224         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
225         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
226         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
227         b->maxnz += bs2*CHUNKSIZE; \
228         b->reallocs++; \
229         b->nz++; \
230       } \
231       N = nrow++ - 1;  \
232       /* shift up all the later entries in this row */ \
233       for (ii=N; ii>=_i; ii--) { \
234         rp[ii+1] = rp[ii]; \
235         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
236       } \
237       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
238       rp[_i]                      = bcol;  \
239       ap[bs2*_i + bs*cidx + ridx] = value;  \
240       b_noinsert:; \
241     bilen[brow] = nrow; \
242 }
243 #endif
244 
245 #if defined(PETSC_USE_MAT_SINGLE)
246 #undef __FUNCT__
247 #define __FUNCT__ "MatSetValues_MPISBAIJ"
248 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
249 {
250   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
251   int          ierr,i,N = m*n;
252   MatScalar    *vsingle;
253 
254   PetscFunctionBegin;
255   if (N > b->setvalueslen) {
256     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
257     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
258     b->setvalueslen  = N;
259   }
260   vsingle = b->setvaluescopy;
261 
262   for (i=0; i<N; i++) {
263     vsingle[i] = v[i];
264   }
265   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
271 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
272 {
273   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
274   int         ierr,i,N = m*n*b->bs2;
275   MatScalar   *vsingle;
276 
277   PetscFunctionBegin;
278   if (N > b->setvalueslen) {
279     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
280     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
281     b->setvalueslen  = N;
282   }
283   vsingle = b->setvaluescopy;
284   for (i=0; i<N; i++) {
285     vsingle[i] = v[i];
286   }
287   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
293 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
294 {
295   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
296   int         ierr,i,N = m*n;
297   MatScalar   *vsingle;
298 
299   PetscFunctionBegin;
300   SETERRQ(1,"Function not yet written for SBAIJ format");
301   /* PetscFunctionReturn(0); */
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
306 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
307 {
308   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
309   int         ierr,i,N = m*n*b->bs2;
310   MatScalar   *vsingle;
311 
312   PetscFunctionBegin;
313   SETERRQ(1,"Function not yet written for SBAIJ format");
314   /* PetscFunctionReturn(0); */
315 }
316 #endif
317 
318 /* Only add/insert a(i,j) with i<=j (blocks).
319    Any a(i,j) with i>j input by user is ingored.
320 */
321 #undef __FUNCT__
322 #define __FUNCT__ "MatSetValues_MPIBAIJ"
323 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
324 {
325   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
326   MatScalar    value;
327   PetscTruth   roworiented = baij->roworiented;
328   int          ierr,i,j,row,col;
329   int          rstart_orig=baij->rstart_bs;
330   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
331   int          cend_orig=baij->cend_bs,bs=baij->bs;
332 
333   /* Some Variables required in the macro */
334   Mat          A = baij->A;
335   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
336   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
337   MatScalar    *aa=a->a;
338 
339   Mat          B = baij->B;
340   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
341   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
342   MatScalar    *ba=b->a;
343 
344   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
345   int          low,high,t,ridx,cidx,bs2=a->bs2;
346   MatScalar    *ap,*bap;
347 
348   /* for stash */
349   int          n_loc, *in_loc=0;
350   MatScalar    *v_loc=0;
351 
352   PetscFunctionBegin;
353 
354   if(!baij->donotstash){
355     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
356     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
357   }
358 
359   for (i=0; i<m; i++) {
360     if (im[i] < 0) continue;
361 #if defined(PETSC_USE_BOPT_g)
362     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
363 #endif
364     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
365       row = im[i] - rstart_orig;              /* local row index */
366       for (j=0; j<n; j++) {
367         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
368         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
369           col = in[j] - cstart_orig;          /* local col index */
370           brow = row/bs; bcol = col/bs;
371           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
372           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
373           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
374           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
375         } else if (in[j] < 0) continue;
376 #if defined(PETSC_USE_BOPT_g)
377         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
378 #endif
379         else {  /* off-diag entry (B) */
380           if (mat->was_assembled) {
381             if (!baij->colmap) {
382               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
383             }
384 #if defined (PETSC_USE_CTABLE)
385             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
386             col  = col - 1 + in[j]%bs;
387 #else
388             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
389 #endif
390             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
391               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
392               col =  in[j];
393               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
394               B = baij->B;
395               b = (Mat_SeqBAIJ*)(B)->data;
396               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
397               ba=b->a;
398             }
399           } else col = in[j];
400           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
401           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
402           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
403         }
404       }
405     } else {  /* off processor entry */
406       if (!baij->donotstash) {
407         n_loc = 0;
408         for (j=0; j<n; j++){
409           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
410           in_loc[n_loc] = in[j];
411           if (roworiented) {
412             v_loc[n_loc] = v[i*n+j];
413           } else {
414             v_loc[n_loc] = v[j*m+i];
415           }
416           n_loc++;
417         }
418         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
419       }
420     }
421   }
422 
423   if(!baij->donotstash){
424     ierr = PetscFree(in_loc);CHKERRQ(ierr);
425     ierr = PetscFree(v_loc);CHKERRQ(ierr);
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
432 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
433 {
434   PetscFunctionBegin;
435   SETERRQ(1,"Function not yet written for SBAIJ format");
436   /* PetscFunctionReturn(0); */
437 }
438 
439 #define HASH_KEY 0.6180339887
440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
441 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
442 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
443 #undef __FUNCT__
444 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
445 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
446 {
447   PetscFunctionBegin;
448   SETERRQ(1,"Function not yet written for SBAIJ format");
449   /* PetscFunctionReturn(0); */
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
454 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
455 {
456   PetscFunctionBegin;
457   SETERRQ(1,"Function not yet written for SBAIJ format");
458   /* PetscFunctionReturn(0); */
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatGetValues_MPISBAIJ"
463 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
464 {
465   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
466   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
467   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
468 
469   PetscFunctionBegin;
470   for (i=0; i<m; i++) {
471     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
472     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
473     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
474       row = idxm[i] - bsrstart;
475       for (j=0; j<n; j++) {
476         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
477         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
478         if (idxn[j] >= bscstart && idxn[j] < bscend){
479           col = idxn[j] - bscstart;
480           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
481         } else {
482           if (!baij->colmap) {
483             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
484           }
485 #if defined (PETSC_USE_CTABLE)
486           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
487           data --;
488 #else
489           data = baij->colmap[idxn[j]/bs]-1;
490 #endif
491           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
492           else {
493             col  = data + idxn[j]%bs;
494             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
495           }
496         }
497       }
498     } else {
499       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
500     }
501   }
502  PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatNorm_MPISBAIJ"
507 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
508 {
509   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
510   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
511   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
512   int        ierr;
513   PetscReal  sum[2],*lnorm2;
514 
515   PetscFunctionBegin;
516   if (baij->size == 1) {
517     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
518   } else {
519     if (type == NORM_FROBENIUS) {
520       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
521       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
522       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
523       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
524       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
525       /*
526       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
527       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
528       */
529       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
530       /*
531       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
532       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
533 
534       *norm = sqrt(sum[0] + 2*sum[1]);
535       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
536     } else {
537       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
538     }
539   }
540   PetscFunctionReturn(0);
541 }
542 
543 /*
544   Creates the hash table, and sets the table
545   This table is created only once.
546   If new entried need to be added to the matrix
547   then the hash table has to be destroyed and
548   recreated.
549 */
550 #undef __FUNCT__
551 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
552 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
553 {
554   PetscFunctionBegin;
555   SETERRQ(1,"Function not yet written for SBAIJ format");
556   /* PetscFunctionReturn(0); */
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
561 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
562 {
563   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
564   int         ierr,nstash,reallocs;
565   InsertMode  addv;
566 
567   PetscFunctionBegin;
568   if (baij->donotstash) {
569     PetscFunctionReturn(0);
570   }
571 
572   /* make sure all processors are either in INSERTMODE or ADDMODE */
573   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
574   if (addv == (ADD_VALUES|INSERT_VALUES)) {
575     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
576   }
577   mat->insertmode = addv; /* in case this processor had no cache */
578 
579   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
580   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
581   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
582   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
583   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
584   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
590 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
591 {
592   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
593   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
594   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
595   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
596   int         *row,*col,other_disassembled;
597   PetscTruth  r1,r2,r3;
598   MatScalar   *val;
599   InsertMode  addv = mat->insertmode;
600   /* int         rank;*/
601 
602   PetscFunctionBegin;
603   /* remove 2 line below later */
604   /*ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); */
605 
606   if (!baij->donotstash) {
607     while (1) {
608       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
609       /*
610       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
611       PetscSynchronizedFlush(PETSC_COMM_WORLD);
612       */
613       if (!flg) break;
614 
615       for (i=0; i<n;) {
616         /* Now identify the consecutive vals belonging to the same row */
617         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
618         if (j < n) ncols = j-i;
619         else       ncols = n-i;
620         /* Now assemble all these values with a single function call */
621         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
622         i = j;
623       }
624     }
625     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
626     /* Now process the block-stash. Since the values are stashed column-oriented,
627        set the roworiented flag to column oriented, and after MatSetValues()
628        restore the original flags */
629     r1 = baij->roworiented;
630     r2 = a->roworiented;
631     r3 = b->roworiented;
632     baij->roworiented = PETSC_FALSE;
633     a->roworiented    = PETSC_FALSE;
634     b->roworiented    = PETSC_FALSE;
635     while (1) {
636       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
637       if (!flg) break;
638 
639       for (i=0; i<n;) {
640         /* Now identify the consecutive vals belonging to the same row */
641         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
642         if (j < n) ncols = j-i;
643         else       ncols = n-i;
644         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
645         i = j;
646       }
647     }
648     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
649     baij->roworiented = r1;
650     a->roworiented    = r2;
651     b->roworiented    = r3;
652   }
653 
654   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
655   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
656 
657   /* determine if any processor has disassembled, if so we must
658      also disassemble ourselfs, in order that we may reassemble. */
659   /*
660      if nonzero structure of submatrix B cannot change then we know that
661      no processor disassembled thus we can skip this stuff
662   */
663   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
664     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
665     if (mat->was_assembled && !other_disassembled) {
666       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
667     }
668   }
669 
670   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
671     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
672   }
673   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
674   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
675 
676 #if defined(PETSC_USE_BOPT_g)
677   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
678     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
679     baij->ht_total_ct  = 0;
680     baij->ht_insert_ct = 0;
681   }
682 #endif
683   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
684     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
685     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
686     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
687   }
688 
689   if (baij->rowvalues) {
690     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
691     baij->rowvalues = 0;
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 #undef __FUNCT__
697 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
698 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
699 {
700   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
701   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
702   PetscTruth        isascii,isdraw;
703   PetscViewer       sviewer;
704   PetscViewerFormat format;
705 
706   PetscFunctionBegin;
707   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
708   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
709   if (isascii) {
710     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
711     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
712       MatInfo info;
713       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
714       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
715       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
716               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
717               baij->bs,(int)info.memory);CHKERRQ(ierr);
718       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
719       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
720       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
721       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
722       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
723       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
724       PetscFunctionReturn(0);
725     } else if (format == PETSC_VIEWER_ASCII_INFO) {
726       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
727       PetscFunctionReturn(0);
728     }
729   }
730 
731   if (isdraw) {
732     PetscDraw       draw;
733     PetscTruth isnull;
734     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
735     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
736   }
737 
738   if (size == 1) {
739     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
740     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
741   } else {
742     /* assemble the entire matrix onto first processor. */
743     Mat         A;
744     Mat_SeqSBAIJ *Aloc;
745     Mat_SeqBAIJ *Bloc;
746     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
747     MatScalar   *a;
748 
749     if (!rank) {
750       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
751     } else {
752       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
753     }
754     PetscLogObjectParent(mat,A);
755 
756     /* copy over the A part */
757     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
758     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
759     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
760 
761     for (i=0; i<mbs; i++) {
762       rvals[0] = bs*(baij->rstart + i);
763       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764       for (j=ai[i]; j<ai[i+1]; j++) {
765         col = (baij->cstart+aj[j])*bs;
766         for (k=0; k<bs; k++) {
767           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768           col++; a += bs;
769         }
770       }
771     }
772     /* copy over the B part */
773     Bloc = (Mat_SeqBAIJ*)baij->B->data;
774     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
775     for (i=0; i<mbs; i++) {
776       rvals[0] = bs*(baij->rstart + i);
777       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
778       for (j=ai[i]; j<ai[i+1]; j++) {
779         col = baij->garray[aj[j]]*bs;
780         for (k=0; k<bs; k++) {
781           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
782           col++; a += bs;
783         }
784       }
785     }
786     ierr = PetscFree(rvals);CHKERRQ(ierr);
787     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
789     /*
790        Everyone has to call to draw the matrix since the graphics waits are
791        synchronized across all processors that share the PetscDraw object
792     */
793     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
794     if (!rank) {
795       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
796       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
797     }
798     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
799     ierr = MatDestroy(A);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatView_MPISBAIJ"
806 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
807 {
808   int        ierr;
809   PetscTruth isascii,isdraw,issocket,isbinary;
810 
811   PetscFunctionBegin;
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
815   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
816   if (isascii || isdraw || issocket || isbinary) {
817     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
818   } else {
819     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_MPISBAIJ"
826 int MatDestroy_MPISBAIJ(Mat mat)
827 {
828   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
829   int         ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
834 #endif
835   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
836   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
837   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
838   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
839   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
840 #if defined (PETSC_USE_CTABLE)
841   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
842 #else
843   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
844 #endif
845   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
846   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
847   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
848   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
849   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
850   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
851 #if defined(PETSC_USE_MAT_SINGLE)
852   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
853 #endif
854   ierr = PetscFree(baij);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatMult_MPISBAIJ"
860 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
861 {
862   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
863   int         ierr,nt;
864 
865   PetscFunctionBegin;
866   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
867   if (nt != A->n) {
868     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
869   }
870   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
871   if (nt != A->m) {
872     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
873   }
874 
875   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
876   /* do diagonal part */
877   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
878   /* do supperdiagonal part */
879   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
880   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
881   /* do subdiagonal part */
882   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
883   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
884   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
885 
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
891 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
892 {
893   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
894   int        ierr;
895 
896   PetscFunctionBegin;
897   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
898   /* do diagonal part */
899   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
900   /* do supperdiagonal part */
901   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
902   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
903 
904   /* do subdiagonal part */
905   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
906   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
907   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
908 
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
914 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
915 {
916   PetscFunctionBegin;
917   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
918   /* PetscFunctionReturn(0); */
919 }
920 
921 #undef __FUNCT__
922 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
923 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
924 {
925   PetscFunctionBegin;
926   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
927   /* PetscFunctionReturn(0); */
928 }
929 
930 /*
931   This only works correctly for square matrices where the subblock A->A is the
932    diagonal block
933 */
934 #undef __FUNCT__
935 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
936 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
937 {
938   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
939   int         ierr;
940 
941   PetscFunctionBegin;
942   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
943   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatScale_MPISBAIJ"
949 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A)
950 {
951   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
952   int         ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
956   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetRow_MPISBAIJ"
962 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
963 {
964   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
965   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
966   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
967   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
968   int            *cmap,*idx_p,cstart = mat->cstart;
969 
970   PetscFunctionBegin;
971   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
972   mat->getrowactive = PETSC_TRUE;
973 
974   if (!mat->rowvalues && (idx || v)) {
975     /*
976         allocate enough space to hold information from the longest row.
977     */
978     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
979     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
980     int     max = 1,mbs = mat->mbs,tmp;
981     for (i=0; i<mbs; i++) {
982       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
983       if (max < tmp) { max = tmp; }
984     }
985     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
986     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
987   }
988 
989   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
990   lrow = row - brstart;  /* local row index */
991 
992   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
993   if (!v)   {pvA = 0; pvB = 0;}
994   if (!idx) {pcA = 0; if (!v) pcB = 0;}
995   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
996   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
997   nztot = nzA + nzB;
998 
999   cmap  = mat->garray;
1000   if (v  || idx) {
1001     if (nztot) {
1002       /* Sort by increasing column numbers, assuming A and B already sorted */
1003       int imark = -1;
1004       if (v) {
1005         *v = v_p = mat->rowvalues;
1006         for (i=0; i<nzB; i++) {
1007           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1008           else break;
1009         }
1010         imark = i;
1011         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1012         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1013       }
1014       if (idx) {
1015         *idx = idx_p = mat->rowindices;
1016         if (imark > -1) {
1017           for (i=0; i<imark; i++) {
1018             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1019           }
1020         } else {
1021           for (i=0; i<nzB; i++) {
1022             if (cmap[cworkB[i]/bs] < cstart)
1023               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1024             else break;
1025           }
1026           imark = i;
1027         }
1028         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1029         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1030       }
1031     } else {
1032       if (idx) *idx = 0;
1033       if (v)   *v   = 0;
1034     }
1035   }
1036   *nz = nztot;
1037   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1038   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1044 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1045 {
1046   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1047 
1048   PetscFunctionBegin;
1049   if (baij->getrowactive == PETSC_FALSE) {
1050     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1051   }
1052   baij->getrowactive = PETSC_FALSE;
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1058 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1059 {
1060   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1061 
1062   PetscFunctionBegin;
1063   *bs = baij->bs;
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1069 int MatZeroEntries_MPISBAIJ(Mat A)
1070 {
1071   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1072   int         ierr;
1073 
1074   PetscFunctionBegin;
1075   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1076   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1082 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1083 {
1084   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1085   Mat         A = a->A,B = a->B;
1086   int         ierr;
1087   PetscReal   isend[5],irecv[5];
1088 
1089   PetscFunctionBegin;
1090   info->block_size     = (PetscReal)a->bs;
1091   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1093   isend[3] = info->memory;  isend[4] = info->mallocs;
1094   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1095   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1096   isend[3] += info->memory;  isend[4] += info->mallocs;
1097   if (flag == MAT_LOCAL) {
1098     info->nz_used      = isend[0];
1099     info->nz_allocated = isend[1];
1100     info->nz_unneeded  = isend[2];
1101     info->memory       = isend[3];
1102     info->mallocs      = isend[4];
1103   } else if (flag == MAT_GLOBAL_MAX) {
1104     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1105     info->nz_used      = irecv[0];
1106     info->nz_allocated = irecv[1];
1107     info->nz_unneeded  = irecv[2];
1108     info->memory       = irecv[3];
1109     info->mallocs      = irecv[4];
1110   } else if (flag == MAT_GLOBAL_SUM) {
1111     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1112     info->nz_used      = irecv[0];
1113     info->nz_allocated = irecv[1];
1114     info->nz_unneeded  = irecv[2];
1115     info->memory       = irecv[3];
1116     info->mallocs      = irecv[4];
1117   } else {
1118     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1119   }
1120   info->rows_global       = (PetscReal)A->M;
1121   info->columns_global    = (PetscReal)A->N;
1122   info->rows_local        = (PetscReal)A->m;
1123   info->columns_local     = (PetscReal)A->N;
1124   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1125   info->fill_ratio_needed = 0;
1126   info->factor_mallocs    = 0;
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1132 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1133 {
1134   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1135   int         ierr;
1136 
1137   PetscFunctionBegin;
1138   switch (op) {
1139   case MAT_NO_NEW_NONZERO_LOCATIONS:
1140   case MAT_YES_NEW_NONZERO_LOCATIONS:
1141   case MAT_COLUMNS_UNSORTED:
1142   case MAT_COLUMNS_SORTED:
1143   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1144   case MAT_KEEP_ZEROED_ROWS:
1145   case MAT_NEW_NONZERO_LOCATION_ERR:
1146     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1147     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1148     break;
1149   case MAT_ROW_ORIENTED:
1150     a->roworiented = PETSC_TRUE;
1151     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1152     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1153     break;
1154   case MAT_ROWS_SORTED:
1155   case MAT_ROWS_UNSORTED:
1156   case MAT_YES_NEW_DIAGONALS:
1157   case MAT_USE_SINGLE_PRECISION_SOLVES:
1158     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1159     break;
1160   case MAT_COLUMN_ORIENTED:
1161     a->roworiented = PETSC_FALSE;
1162     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1163     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1164     break;
1165   case MAT_IGNORE_OFF_PROC_ENTRIES:
1166     a->donotstash = PETSC_TRUE;
1167     break;
1168   case MAT_NO_NEW_DIAGONALS:
1169     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1170     break;
1171   case MAT_USE_HASH_TABLE:
1172     a->ht_flag = PETSC_TRUE;
1173     break;
1174   default:
1175     SETERRQ(PETSC_ERR_SUP,"unknown option");
1176     break;
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatTranspose_MPISBAIJ("
1183 int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1184 {
1185   PetscFunctionBegin;
1186   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
1187   /* PetscFunctionReturn(0); */
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1192 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1193 {
1194   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1195   Mat         a = baij->A,b = baij->B;
1196   int         ierr,s1,s2,s3;
1197 
1198   PetscFunctionBegin;
1199   if (ll != rr) {
1200     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1201   }
1202   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1203   if (rr) {
1204     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1205     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1206     /* Overlap communication with computation. */
1207     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1208     /*} if (ll) { */
1209     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1210     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1211     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1212     /* } */
1213   /* scale  the diagonal block */
1214   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1215 
1216   /* if (rr) { */
1217     /* Do a scatter end and then right scale the off-diagonal block */
1218     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1219     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1220   }
1221 
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1227 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag)
1228 {
1229   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1230   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1231   int            *procs,*nprocs,j,idx,nsends,*work,row;
1232   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1233   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1234   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1235   MPI_Comm       comm = A->comm;
1236   MPI_Request    *send_waits,*recv_waits;
1237   MPI_Status     recv_status,*send_status;
1238   IS             istmp;
1239   PetscTruth     found;
1240 
1241   PetscFunctionBegin;
1242   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1243   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1244 
1245   /*  first count number of contributors to each processor */
1246   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1247   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1248   procs = nprocs + size;
1249   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1250   for (i=0; i<N; i++) {
1251     idx   = rows[i];
1252     found = PETSC_FALSE;
1253     for (j=0; j<size; j++) {
1254       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1255         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1256       }
1257     }
1258     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1259   }
1260   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1261 
1262   /* inform other processors of number of messages and max length*/
1263   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1264   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1265   nmax   = work[rank];
1266   nrecvs = work[size+rank];
1267   ierr   = PetscFree(work);CHKERRQ(ierr);
1268 
1269   /* post receives:   */
1270   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1271   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1272   for (i=0; i<nrecvs; i++) {
1273     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1274   }
1275 
1276   /* do sends:
1277      1) starts[i] gives the starting index in svalues for stuff going to
1278      the ith processor
1279   */
1280   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1281   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1282   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1283   starts[0]  = 0;
1284   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1285   for (i=0; i<N; i++) {
1286     svalues[starts[owner[i]]++] = rows[i];
1287   }
1288   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1289 
1290   starts[0] = 0;
1291   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1292   count = 0;
1293   for (i=0; i<size; i++) {
1294     if (procs[i]) {
1295       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1296     }
1297   }
1298   ierr = PetscFree(starts);CHKERRQ(ierr);
1299 
1300   base = owners[rank]*bs;
1301 
1302   /*  wait on receives */
1303   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1304   source = lens + nrecvs;
1305   count  = nrecvs; slen = 0;
1306   while (count) {
1307     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1308     /* unpack receives into our local space */
1309     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1310     source[imdex]  = recv_status.MPI_SOURCE;
1311     lens[imdex]    = n;
1312     slen          += n;
1313     count--;
1314   }
1315   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1316 
1317   /* move the data into the send scatter */
1318   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1319   count = 0;
1320   for (i=0; i<nrecvs; i++) {
1321     values = rvalues + i*nmax;
1322     for (j=0; j<lens[i]; j++) {
1323       lrows[count++] = values[j] - base;
1324     }
1325   }
1326   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1327   ierr = PetscFree(lens);CHKERRQ(ierr);
1328   ierr = PetscFree(owner);CHKERRQ(ierr);
1329   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1330 
1331   /* actually zap the local rows */
1332   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1333   PetscLogObjectParent(A,istmp);
1334 
1335   /*
1336         Zero the required rows. If the "diagonal block" of the matrix
1337      is square and the user wishes to set the diagonal we use seperate
1338      code so that MatSetValues() is not called for each diagonal allocating
1339      new memory, thus calling lots of mallocs and slowing things down.
1340 
1341        Contributed by: Mathew Knepley
1342   */
1343   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1344   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1345   if (diag && (l->A->M == l->A->N)) {
1346     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1347   } else if (diag) {
1348     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1349     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1350       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1351 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1352     }
1353     for (i=0; i<slen; i++) {
1354       row  = lrows[i] + rstart_bs;
1355       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1356     }
1357     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1358     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1359   } else {
1360     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1361   }
1362 
1363   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1364   ierr = PetscFree(lrows);CHKERRQ(ierr);
1365 
1366   /* wait on sends */
1367   if (nsends) {
1368     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1369     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1370     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1371   }
1372   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1373   ierr = PetscFree(svalues);CHKERRQ(ierr);
1374 
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1380 int MatPrintHelp_MPISBAIJ(Mat A)
1381 {
1382   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1383   MPI_Comm    comm = A->comm;
1384   static int  called = 0;
1385   int         ierr;
1386 
1387   PetscFunctionBegin;
1388   if (!a->rank) {
1389     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1390   }
1391   if (called) {PetscFunctionReturn(0);} else called = 1;
1392   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1393   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1399 int MatSetUnfactored_MPISBAIJ(Mat A)
1400 {
1401   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1402   int         ierr;
1403 
1404   PetscFunctionBegin;
1405   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatEqual_MPISBAIJ"
1413 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1414 {
1415   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1416   Mat         a,b,c,d;
1417   PetscTruth  flg;
1418   int         ierr;
1419 
1420   PetscFunctionBegin;
1421   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr);
1422   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1423   a = matA->A; b = matA->B;
1424   c = matB->A; d = matB->B;
1425 
1426   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1427   if (flg == PETSC_TRUE) {
1428     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1429   }
1430   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1436 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1437 {
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 /* -------------------------------------------------------------------*/
1445 static struct _MatOps MatOps_Values = {
1446   MatSetValues_MPISBAIJ,
1447   MatGetRow_MPISBAIJ,
1448   MatRestoreRow_MPISBAIJ,
1449   MatMult_MPISBAIJ,
1450   MatMultAdd_MPISBAIJ,
1451   MatMultTranspose_MPISBAIJ,
1452   MatMultTransposeAdd_MPISBAIJ,
1453   0,
1454   0,
1455   0,
1456   0,
1457   0,
1458   0,
1459   MatRelax_MPISBAIJ,
1460   MatTranspose_MPISBAIJ,
1461   MatGetInfo_MPISBAIJ,
1462   MatEqual_MPISBAIJ,
1463   MatGetDiagonal_MPISBAIJ,
1464   MatDiagonalScale_MPISBAIJ,
1465   MatNorm_MPISBAIJ,
1466   MatAssemblyBegin_MPISBAIJ,
1467   MatAssemblyEnd_MPISBAIJ,
1468   0,
1469   MatSetOption_MPISBAIJ,
1470   MatZeroEntries_MPISBAIJ,
1471   MatZeroRows_MPISBAIJ,
1472   0,
1473   0,
1474   0,
1475   0,
1476   MatSetUpPreallocation_MPISBAIJ,
1477   0,
1478   0,
1479   0,
1480   0,
1481   MatDuplicate_MPISBAIJ,
1482   0,
1483   0,
1484   0,
1485   0,
1486   0,
1487   MatGetSubMatrices_MPISBAIJ,
1488   MatIncreaseOverlap_MPISBAIJ,
1489   MatGetValues_MPISBAIJ,
1490   0,
1491   MatPrintHelp_MPISBAIJ,
1492   MatScale_MPISBAIJ,
1493   0,
1494   0,
1495   0,
1496   MatGetBlockSize_MPISBAIJ,
1497   0,
1498   0,
1499   0,
1500   0,
1501   0,
1502   0,
1503   MatSetUnfactored_MPISBAIJ,
1504   0,
1505   MatSetValuesBlocked_MPISBAIJ,
1506   0,
1507   0,
1508   0,
1509   MatGetPetscMaps_Petsc,
1510   0,
1511   0,
1512   0,
1513   0,
1514   0,
1515   0,
1516   MatGetRowMax_MPISBAIJ};
1517 
1518 
1519 EXTERN_C_BEGIN
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1522 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1523 {
1524   PetscFunctionBegin;
1525   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1526   *iscopy = PETSC_FALSE;
1527   PetscFunctionReturn(0);
1528 }
1529 EXTERN_C_END
1530 
1531 EXTERN_C_BEGIN
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatCreate_MPISBAIJ"
1534 int MatCreate_MPISBAIJ(Mat B)
1535 {
1536   Mat_MPISBAIJ *b;
1537   int          ierr;
1538   PetscTruth   flg;
1539 
1540   PetscFunctionBegin;
1541 
1542   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1543   B->data = (void*)b;
1544   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1545   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1546 
1547   B->ops->destroy    = MatDestroy_MPISBAIJ;
1548   B->ops->view       = MatView_MPISBAIJ;
1549   B->mapping    = 0;
1550   B->factor     = 0;
1551   B->assembled  = PETSC_FALSE;
1552 
1553   B->insertmode = NOT_SET_VALUES;
1554   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1555   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1556 
1557   /* build local table of row and column ownerships */
1558   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1559   b->cowners    = b->rowners + b->size + 2;
1560   b->rowners_bs = b->cowners + b->size + 2;
1561   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1562 
1563   /* build cache for off array entries formed */
1564   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1565   b->donotstash  = PETSC_FALSE;
1566   b->colmap      = PETSC_NULL;
1567   b->garray      = PETSC_NULL;
1568   b->roworiented = PETSC_TRUE;
1569 
1570 #if defined(PETSC_USE_MAT_SINGLE)
1571   /* stuff for MatSetValues_XXX in single precision */
1572   b->setvalueslen     = 0;
1573   b->setvaluescopy    = PETSC_NULL;
1574 #endif
1575 
1576   /* stuff used in block assembly */
1577   b->barray       = 0;
1578 
1579   /* stuff used for matrix vector multiply */
1580   b->lvec         = 0;
1581   b->Mvctx        = 0;
1582 
1583   /* stuff for MatGetRow() */
1584   b->rowindices   = 0;
1585   b->rowvalues    = 0;
1586   b->getrowactive = PETSC_FALSE;
1587 
1588   /* hash table stuff */
1589   b->ht           = 0;
1590   b->hd           = 0;
1591   b->ht_size      = 0;
1592   b->ht_flag      = PETSC_FALSE;
1593   b->ht_fact      = 0;
1594   b->ht_total_ct  = 0;
1595   b->ht_insert_ct = 0;
1596 
1597   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1598   if (flg) {
1599     PetscReal fact = 1.39;
1600     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1601     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1602     if (fact <= 1.0) fact = 1.39;
1603     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1604     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1605   }
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1607                                      "MatStoreValues_MPISBAIJ",
1608                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1610                                      "MatRetrieveValues_MPISBAIJ",
1611                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1613                                      "MatGetDiagonalBlock_MPISBAIJ",
1614                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1615   PetscFunctionReturn(0);
1616 }
1617 EXTERN_C_END
1618 
1619 #undef __FUNCT__
1620 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1621 /*@C
1622    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1623    the user should preallocate the matrix storage by setting the parameters
1624    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1625    performance can be increased by more than a factor of 50.
1626 
1627    Collective on Mat
1628 
1629    Input Parameters:
1630 +  A - the matrix
1631 .  bs   - size of blockk
1632 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1633            submatrix  (same for all local rows)
1634 .  d_nnz - array containing the number of block nonzeros in the various block rows
1635            of the in diagonal portion of the local (possibly different for each block
1636            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1637 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1638            submatrix (same for all local rows).
1639 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1640            off-diagonal portion of the local submatrix (possibly different for
1641            each block row) or PETSC_NULL.
1642 
1643 
1644    Options Database Keys:
1645 .   -mat_no_unroll - uses code that does not unroll the loops in the
1646                      block calculations (much slower)
1647 .   -mat_block_size - size of the blocks to use
1648 
1649    Notes:
1650 
1651    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1652    than it must be used on all processors that share the object for that argument.
1653 
1654    Storage Information:
1655    For a square global matrix we define each processor's diagonal portion
1656    to be its local rows and the corresponding columns (a square submatrix);
1657    each processor's off-diagonal portion encompasses the remainder of the
1658    local matrix (a rectangular submatrix).
1659 
1660    The user can specify preallocated storage for the diagonal part of
1661    the local submatrix with either d_nz or d_nnz (not both).  Set
1662    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1663    memory allocation.  Likewise, specify preallocated storage for the
1664    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1665 
1666    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1667    the figure below we depict these three local rows and all columns (0-11).
1668 
1669 .vb
1670            0 1 2 3 4 5 6 7 8 9 10 11
1671           -------------------
1672    row 3  |  o o o d d d o o o o o o
1673    row 4  |  o o o d d d o o o o o o
1674    row 5  |  o o o d d d o o o o o o
1675           -------------------
1676 .ve
1677 
1678    Thus, any entries in the d locations are stored in the d (diagonal)
1679    submatrix, and any entries in the o locations are stored in the
1680    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1681    stored simply in the MATSEQBAIJ format for compressed row storage.
1682 
1683    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1684    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1685    In general, for PDE problems in which most nonzeros are near the diagonal,
1686    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1687    or you will get TERRIBLE performance; see the users' manual chapter on
1688    matrices.
1689 
1690    Level: intermediate
1691 
1692 .keywords: matrix, block, aij, compressed row, sparse, parallel
1693 
1694 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1695 @*/
1696 
1697 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1698 {
1699   Mat_MPISBAIJ *b;
1700   int          ierr,i,mbs,Mbs;
1701   PetscTruth   flg2;
1702 
1703   PetscFunctionBegin;
1704   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr);
1705   if (!flg2) PetscFunctionReturn(0);
1706 
1707   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1708 
1709   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1710   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1711   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1712   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1713   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1714   if (d_nnz) {
1715     for (i=0; i<B->m/bs; i++) {
1716       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1717     }
1718   }
1719   if (o_nnz) {
1720     for (i=0; i<B->m/bs; i++) {
1721       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1722     }
1723   }
1724   B->preallocated = PETSC_TRUE;
1725   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1726   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1727   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1728   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1729 
1730   b   = (Mat_MPISBAIJ*)B->data;
1731   mbs = B->m/bs;
1732   Mbs = B->M/bs;
1733   if (mbs*bs != B->m) {
1734     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1735   }
1736 
1737   b->bs  = bs;
1738   b->bs2 = bs*bs;
1739   b->mbs = mbs;
1740   b->nbs = mbs;
1741   b->Mbs = Mbs;
1742   b->Nbs = Mbs;
1743 
1744   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1745   b->rowners[0]    = 0;
1746   for (i=2; i<=b->size; i++) {
1747     b->rowners[i] += b->rowners[i-1];
1748   }
1749   b->rstart    = b->rowners[b->rank];
1750   b->rend      = b->rowners[b->rank+1];
1751   b->cstart    = b->rstart;
1752   b->cend      = b->rend;
1753   for (i=0; i<=b->size; i++) {
1754     b->rowners_bs[i] = b->rowners[i]*bs;
1755   }
1756   b->rstart_bs = b-> rstart*bs;
1757   b->rend_bs   = b->rend*bs;
1758 
1759   b->cstart_bs = b->cstart*bs;
1760   b->cend_bs   = b->cend*bs;
1761 
1762 
1763   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1764   PetscLogObjectParent(B,b->A);
1765   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1766   PetscLogObjectParent(B,b->B);
1767 
1768   /* build cache for off array entries formed */
1769   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1770 
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 #undef __FUNCT__
1775 #define __FUNCT__ "MatCreateMPISBAIJ"
1776 /*@C
1777    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1778    (block compressed row).  For good matrix assembly performance
1779    the user should preallocate the matrix storage by setting the parameters
1780    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1781    performance can be increased by more than a factor of 50.
1782 
1783    Collective on MPI_Comm
1784 
1785    Input Parameters:
1786 +  comm - MPI communicator
1787 .  bs   - size of blockk
1788 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1789            This value should be the same as the local size used in creating the
1790            y vector for the matrix-vector product y = Ax.
1791 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1792            This value should be the same as the local size used in creating the
1793            x vector for the matrix-vector product y = Ax.
1794 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1795 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1796 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1797            submatrix  (same for all local rows)
1798 .  d_nnz - array containing the number of block nonzeros in the various block rows
1799            of the in diagonal portion of the local (possibly different for each block
1800            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1801 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1802            submatrix (same for all local rows).
1803 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1804            off-diagonal portion of the local submatrix (possibly different for
1805            each block row) or PETSC_NULL.
1806 
1807    Output Parameter:
1808 .  A - the matrix
1809 
1810    Options Database Keys:
1811 .   -mat_no_unroll - uses code that does not unroll the loops in the
1812                      block calculations (much slower)
1813 .   -mat_block_size - size of the blocks to use
1814 .   -mat_mpi - use the parallel matrix data structures even on one processor
1815                (defaults to using SeqBAIJ format on one processor)
1816 
1817    Notes:
1818    The user MUST specify either the local or global matrix dimensions
1819    (possibly both).
1820 
1821    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1822    than it must be used on all processors that share the object for that argument.
1823 
1824    Storage Information:
1825    For a square global matrix we define each processor's diagonal portion
1826    to be its local rows and the corresponding columns (a square submatrix);
1827    each processor's off-diagonal portion encompasses the remainder of the
1828    local matrix (a rectangular submatrix).
1829 
1830    The user can specify preallocated storage for the diagonal part of
1831    the local submatrix with either d_nz or d_nnz (not both).  Set
1832    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1833    memory allocation.  Likewise, specify preallocated storage for the
1834    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1835 
1836    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1837    the figure below we depict these three local rows and all columns (0-11).
1838 
1839 .vb
1840            0 1 2 3 4 5 6 7 8 9 10 11
1841           -------------------
1842    row 3  |  o o o d d d o o o o o o
1843    row 4  |  o o o d d d o o o o o o
1844    row 5  |  o o o d d d o o o o o o
1845           -------------------
1846 .ve
1847 
1848    Thus, any entries in the d locations are stored in the d (diagonal)
1849    submatrix, and any entries in the o locations are stored in the
1850    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1851    stored simply in the MATSEQBAIJ format for compressed row storage.
1852 
1853    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1854    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1855    In general, for PDE problems in which most nonzeros are near the diagonal,
1856    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1857    or you will get TERRIBLE performance; see the users' manual chapter on
1858    matrices.
1859 
1860    Level: intermediate
1861 
1862 .keywords: matrix, block, aij, compressed row, sparse, parallel
1863 
1864 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1865 @*/
1866 
1867 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1868 {
1869   int ierr,size;
1870 
1871   PetscFunctionBegin;
1872   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1874   if (size > 1) {
1875     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1876     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1877   } else {
1878     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1879     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1880   }
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1887 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1888 {
1889   Mat          mat;
1890   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1891   int          ierr,len=0;
1892 
1893   PetscFunctionBegin;
1894   *newmat       = 0;
1895   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1896   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
1897   mat->preallocated = PETSC_TRUE;
1898   a = (Mat_MPISBAIJ*)mat->data;
1899   a->bs  = oldmat->bs;
1900   a->bs2 = oldmat->bs2;
1901   a->mbs = oldmat->mbs;
1902   a->nbs = oldmat->nbs;
1903   a->Mbs = oldmat->Mbs;
1904   a->Nbs = oldmat->Nbs;
1905 
1906   a->rstart       = oldmat->rstart;
1907   a->rend         = oldmat->rend;
1908   a->cstart       = oldmat->cstart;
1909   a->cend         = oldmat->cend;
1910   a->size         = oldmat->size;
1911   a->rank         = oldmat->rank;
1912   a->donotstash   = oldmat->donotstash;
1913   a->roworiented  = oldmat->roworiented;
1914   a->rowindices   = 0;
1915   a->rowvalues    = 0;
1916   a->getrowactive = PETSC_FALSE;
1917   a->barray       = 0;
1918   a->rstart_bs    = oldmat->rstart_bs;
1919   a->rend_bs      = oldmat->rend_bs;
1920   a->cstart_bs    = oldmat->cstart_bs;
1921   a->cend_bs      = oldmat->cend_bs;
1922 
1923   /* hash table stuff */
1924   a->ht           = 0;
1925   a->hd           = 0;
1926   a->ht_size      = 0;
1927   a->ht_flag      = oldmat->ht_flag;
1928   a->ht_fact      = oldmat->ht_fact;
1929   a->ht_total_ct  = 0;
1930   a->ht_insert_ct = 0;
1931 
1932   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1933   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1934   a->cowners    = a->rowners + a->size + 2;
1935   a->rowners_bs = a->cowners + a->size + 2;
1936   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1937   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1938   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
1939   if (oldmat->colmap) {
1940 #if defined (PETSC_USE_CTABLE)
1941     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1942 #else
1943     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1944     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1945     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
1946 #endif
1947   } else a->colmap = 0;
1948   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1949     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
1950     PetscLogObjectMemory(mat,len*sizeof(int));
1951     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
1952   } else a->garray = 0;
1953 
1954   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1955   PetscLogObjectParent(mat,a->lvec);
1956   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1957 
1958   PetscLogObjectParent(mat,a->Mvctx);
1959   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1960   PetscLogObjectParent(mat,a->A);
1961   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1962   PetscLogObjectParent(mat,a->B);
1963   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1964   *newmat = mat;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #include "petscsys.h"
1969 
1970 EXTERN_C_BEGIN
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatLoad_MPISBAIJ"
1973 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1974 {
1975   Mat          A;
1976   int          i,nz,ierr,j,rstart,rend,fd;
1977   PetscScalar  *vals,*buf;
1978   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1979   MPI_Status   status;
1980   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
1981   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
1982   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
1983   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1984   int          dcount,kmax,k,nzcount,tmp;
1985 
1986   PetscFunctionBegin;
1987   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1988 
1989   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1990   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1991   if (!rank) {
1992     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1993     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1994     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1995     if (header[3] < 0) {
1996       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
1997     }
1998   }
1999 
2000   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2001   M = header[1]; N = header[2];
2002 
2003   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2004 
2005   /*
2006      This code adds extra rows to make sure the number of rows is
2007      divisible by the blocksize
2008   */
2009   Mbs        = M/bs;
2010   extra_rows = bs - M + bs*(Mbs);
2011   if (extra_rows == bs) extra_rows = 0;
2012   else                  Mbs++;
2013   if (extra_rows &&!rank) {
2014     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2015   }
2016 
2017   /* determine ownership of all rows */
2018   mbs        = Mbs/size + ((Mbs % size) > rank);
2019   m          = mbs*bs;
2020   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2021   browners   = rowners + size + 1;
2022   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2023   rowners[0] = 0;
2024   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2025   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2026   rstart = rowners[rank];
2027   rend   = rowners[rank+1];
2028 
2029   /* distribute row lengths to all processors */
2030   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2031   if (!rank) {
2032     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2033     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2034     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2035     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2036     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2037     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2038     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2039   } else {
2040     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2041   }
2042 
2043   if (!rank) {   /* procs[0] */
2044     /* calculate the number of nonzeros on each processor */
2045     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2046     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2047     for (i=0; i<size; i++) {
2048       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2049         procsnz[i] += rowlengths[j];
2050       }
2051     }
2052     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2053 
2054     /* determine max buffer needed and allocate it */
2055     maxnz = 0;
2056     for (i=0; i<size; i++) {
2057       maxnz = PetscMax(maxnz,procsnz[i]);
2058     }
2059     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2060 
2061     /* read in my part of the matrix column indices  */
2062     nz     = procsnz[0];
2063     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2064     mycols = ibuf;
2065     if (size == 1)  nz -= extra_rows;
2066     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2067     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2068 
2069     /* read in every ones (except the last) and ship off */
2070     for (i=1; i<size-1; i++) {
2071       nz   = procsnz[i];
2072       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2073       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2074     }
2075     /* read in the stuff for the last proc */
2076     if (size != 1) {
2077       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2078       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2079       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2080       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2081     }
2082     ierr = PetscFree(cols);CHKERRQ(ierr);
2083   } else {  /* procs[i], i>0 */
2084     /* determine buffer space needed for message */
2085     nz = 0;
2086     for (i=0; i<m; i++) {
2087       nz += locrowlens[i];
2088     }
2089     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2090     mycols = ibuf;
2091     /* receive message of column indices*/
2092     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2093     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2094     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2095   }
2096 
2097   /* loop over local rows, determining number of off diagonal entries */
2098   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2099   odlens   = dlens + (rend-rstart);
2100   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2101   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2102   masked1  = mask    + Mbs;
2103   masked2  = masked1 + Mbs;
2104   rowcount = 0; nzcount = 0;
2105   for (i=0; i<mbs; i++) {
2106     dcount  = 0;
2107     odcount = 0;
2108     for (j=0; j<bs; j++) {
2109       kmax = locrowlens[rowcount];
2110       for (k=0; k<kmax; k++) {
2111         tmp = mycols[nzcount++]/bs; /* block col. index */
2112         if (!mask[tmp]) {
2113           mask[tmp] = 1;
2114           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2115           else masked1[dcount++] = tmp; /* entry in diag portion */
2116         }
2117       }
2118       rowcount++;
2119     }
2120 
2121     dlens[i]  = dcount;  /* d_nzz[i] */
2122     odlens[i] = odcount; /* o_nzz[i] */
2123 
2124     /* zero out the mask elements we set */
2125     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2126     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2127   }
2128 
2129   /* create our matrix */
2130   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2131   CHKERRQ(ierr);
2132   A = *newmat;
2133   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2134 
2135   if (!rank) {
2136     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2137     /* read in my part of the matrix numerical values  */
2138     nz = procsnz[0];
2139     vals = buf;
2140     mycols = ibuf;
2141     if (size == 1)  nz -= extra_rows;
2142     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2143     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2144 
2145     /* insert into matrix */
2146     jj      = rstart*bs;
2147     for (i=0; i<m; i++) {
2148       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2149       mycols += locrowlens[i];
2150       vals   += locrowlens[i];
2151       jj++;
2152     }
2153 
2154     /* read in other processors (except the last one) and ship out */
2155     for (i=1; i<size-1; i++) {
2156       nz   = procsnz[i];
2157       vals = buf;
2158       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2159       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2160     }
2161     /* the last proc */
2162     if (size != 1){
2163       nz   = procsnz[i] - extra_rows;
2164       vals = buf;
2165       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2166       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2167       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2168     }
2169     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2170 
2171   } else {
2172     /* receive numeric values */
2173     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2174 
2175     /* receive message of values*/
2176     vals   = buf;
2177     mycols = ibuf;
2178     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2179     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2180     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2181 
2182     /* insert into matrix */
2183     jj      = rstart*bs;
2184     for (i=0; i<m; i++) {
2185       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2186       mycols += locrowlens[i];
2187       vals   += locrowlens[i];
2188       jj++;
2189     }
2190   }
2191 
2192   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2193   ierr = PetscFree(buf);CHKERRQ(ierr);
2194   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2195   ierr = PetscFree(rowners);CHKERRQ(ierr);
2196   ierr = PetscFree(dlens);CHKERRQ(ierr);
2197   ierr = PetscFree(mask);CHKERRQ(ierr);
2198   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2199   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 EXTERN_C_END
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2206 /*@
2207    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2208 
2209    Input Parameters:
2210 .  mat  - the matrix
2211 .  fact - factor
2212 
2213    Collective on Mat
2214 
2215    Level: advanced
2216 
2217   Notes:
2218    This can also be set by the command line option: -mat_use_hash_table fact
2219 
2220 .keywords: matrix, hashtable, factor, HT
2221 
2222 .seealso: MatSetOption()
2223 @*/
2224 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2225 {
2226   PetscFunctionBegin;
2227   SETERRQ(1,"Function not yet written for SBAIJ format");
2228   /* PetscFunctionReturn(0); */
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2233 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2234 {
2235   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2236   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2237   PetscReal    atmp;
2238   PetscReal    *work,*svalues,*rvalues;
2239   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2240   int          rank,size,*rowners_bs,dest,count,source;
2241   PetscScalar  *va;
2242   MatScalar    *ba;
2243   MPI_Status   stat;
2244 
2245   PetscFunctionBegin;
2246   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2247   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2248 
2249   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
2250   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
2251 
2252   bs   = a->bs;
2253   mbs  = a->mbs;
2254   Mbs  = a->Mbs;
2255   ba   = b->a;
2256   bi   = b->i;
2257   bj   = b->j;
2258   /*
2259   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2260   PetscSynchronizedFlush(PETSC_COMM_WORLD);
2261   */
2262 
2263   /* find ownerships */
2264   rowners_bs = a->rowners_bs;
2265   /*
2266   if (!rank){
2267     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2268   }
2269   */
2270 
2271   /* each proc creates an array to be distributed */
2272   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2273   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2274 
2275   /* row_max for B */
2276   if (rank != size-1){
2277     for (i=0; i<mbs; i++) {
2278       ncols = bi[1] - bi[0]; bi++;
2279       brow  = bs*i;
2280       for (j=0; j<ncols; j++){
2281         bcol = bs*(*bj);
2282         for (kcol=0; kcol<bs; kcol++){
2283           col = bcol + kcol;                 /* local col index */
2284           col += rowners_bs[rank+1];      /* global col index */
2285           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2286           for (krow=0; krow<bs; krow++){
2287             atmp = PetscAbsScalar(*ba); ba++;
2288             row = brow + krow;    /* local row index */
2289             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2290             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2291             if (work[col] < atmp) work[col] = atmp;
2292           }
2293         }
2294         bj++;
2295       }
2296     }
2297     /*
2298       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2299       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2300       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2301       */
2302 
2303     /* send values to its owners */
2304     for (dest=rank+1; dest<size; dest++){
2305       svalues = work + rowners_bs[dest];
2306       count   = rowners_bs[dest+1]-rowners_bs[dest];
2307       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr);
2308       /*
2309       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2310       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2311       */
2312     }
2313   }
2314 
2315   /* receive values */
2316   if (rank){
2317     rvalues = work;
2318     count   = rowners_bs[rank+1]-rowners_bs[rank];
2319     for (source=0; source<rank; source++){
2320       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr);
2321       /* process values */
2322       for (i=0; i<count; i++){
2323         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2324       }
2325       /*
2326       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2327       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2328       */
2329     }
2330   }
2331 
2332   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2333   ierr = PetscFree(work);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatRelax_MPISBAIJ"
2339 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx)
2340 {
2341   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2342   int            ierr;
2343   PetscScalar    mone=-1.0;
2344   Vec            lvec1,bb1;
2345   MatSORType     lflg=SOR_LOCAL_SYMMETRIC_SWEEP;
2346 
2347   PetscFunctionBegin;
2348 
2349   if (mat->bs > 1)
2350     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2351 
2352   if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2353     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,1,xx);CHKERRQ(ierr);
2354     its--;
2355   }
2356 
2357   ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2358   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2359   while (its--){
2360     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2361 
2362     /* lower diagonal part: bb1 = bb - B^T*xx */
2363     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2364     ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2365 
2366     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2367     ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2368     ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2369 
2370     /* upper diagonal part: bb1 = bb1 - B*x */
2371     ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2372     ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2373 
2374     ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2375 
2376     /* diagonal part */
2377     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,lflg,fshift,1,xx);CHKERRQ(ierr);
2378 
2379   }
2380   ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2381   ierr = VecDestroy(bb1);CHKERRQ(ierr);
2382   PetscFunctionReturn(0);
2383 }
2384 
2385