xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 3b2db978374f4bd96835d439ab1dbc183cd01db2)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *);
14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
83 static int CreateColmap_MPISBAIJ_Private(Mat mat)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"Function not yet written for SBAIJ format");
87   /* PetscFunctionReturn(0); */
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
123  \
124         /* malloc new storage space */ \
125         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
127         new_j = (int*)(new_a + bs2*new_nz); \
128         new_i = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
199  \
200         /* malloc new storage space */ \
201         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
203         new_j = (int*)(new_a + bs2*new_nz); \
204         new_i = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNCT__
248 #define __FUNCT__ "MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int          ierr,i,N = m*n;
253   MatScalar    *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,"Function not yet written for SBAIJ format");
302   /* PetscFunctionReturn(0); */
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,"Function not yet written for SBAIJ format");
315   /* PetscFunctionReturn(0); */
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSetValues_MPIBAIJ"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar    value;
328   PetscTruth   roworiented = baij->roworiented;
329   int          ierr,i,j,row,col;
330   int          rstart_orig=baij->rstart_bs;
331   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
332   int          cend_orig=baij->cend_bs,bs=baij->bs;
333 
334   /* Some Variables required in the macro */
335   Mat          A = baij->A;
336   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
337   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
338   MatScalar    *aa=a->a;
339 
340   Mat          B = baij->B;
341   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
342   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
343   MatScalar    *ba=b->a;
344 
345   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
346   int          low,high,t,ridx,cidx,bs2=a->bs2;
347   MatScalar    *ap,*bap;
348 
349   /* for stash */
350   int          n_loc, *in_loc=0;
351   MatScalar    *v_loc=0;
352 
353   PetscFunctionBegin;
354 
355   if(!baij->donotstash){
356     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
357     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
358   }
359 
360   for (i=0; i<m; i++) {
361     if (im[i] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
364 #endif
365     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
366       row = im[i] - rstart_orig;              /* local row index */
367       for (j=0; j<n; j++) {
368         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
369         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
370           col = in[j] - cstart_orig;          /* local col index */
371           brow = row/bs; bcol = col/bs;
372           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         } else if (in[j] < 0) continue;
377 #if defined(PETSC_USE_BOPT_g)
378         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
379 #endif
380         else {  /* off-diag entry (B) */
381           if (mat->was_assembled) {
382             if (!baij->colmap) {
383               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
384             }
385 #if defined (PETSC_USE_CTABLE)
386             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
387             col  = col - 1;
388 #else
389             col = baij->colmap[in[j]/bs] - 1;
390 #endif
391             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
395               B = baij->B;
396               b = (Mat_SeqBAIJ*)(B)->data;
397               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
398               ba=b->a;
399             } else col += in[j]%bs;
400           } else col = in[j];
401           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
402           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
403           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
404         }
405       }
406     } else {  /* off processor entry */
407       if (!baij->donotstash) {
408         n_loc = 0;
409         for (j=0; j<n; j++){
410           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
411           in_loc[n_loc] = in[j];
412           if (roworiented) {
413             v_loc[n_loc] = v[i*n+j];
414           } else {
415             v_loc[n_loc] = v[j*m+i];
416           }
417           n_loc++;
418         }
419         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
420       }
421     }
422   }
423 
424   if(!baij->donotstash){
425     ierr = PetscFree(in_loc);CHKERRQ(ierr);
426     ierr = PetscFree(v_loc);CHKERRQ(ierr);
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
434 {
435   PetscFunctionBegin;
436   SETERRQ(1,"Function not yet written for SBAIJ format");
437   /* PetscFunctionReturn(0); */
438 }
439 
440 #define HASH_KEY 0.6180339887
441 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
442 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
443 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
444 #undef __FUNCT__
445 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
446 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
447 {
448   PetscFunctionBegin;
449   SETERRQ(1,"Function not yet written for SBAIJ format");
450   /* PetscFunctionReturn(0); */
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
455 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
456 {
457   PetscFunctionBegin;
458   SETERRQ(1,"Function not yet written for SBAIJ format");
459   /* PetscFunctionReturn(0); */
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatGetValues_MPISBAIJ"
464 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
465 {
466   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
467   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
468   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
469 
470   PetscFunctionBegin;
471   for (i=0; i<m; i++) {
472     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
473     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
474     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
475       row = idxm[i] - bsrstart;
476       for (j=0; j<n; j++) {
477         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
478         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
479         if (idxn[j] >= bscstart && idxn[j] < bscend){
480           col = idxn[j] - bscstart;
481           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
482         } else {
483           if (!baij->colmap) {
484             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
485           }
486 #if defined (PETSC_USE_CTABLE)
487           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
488           data --;
489 #else
490           data = baij->colmap[idxn[j]/bs]-1;
491 #endif
492           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
493           else {
494             col  = data + idxn[j]%bs;
495             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
496           }
497         }
498       }
499     } else {
500       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
501     }
502   }
503  PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatNorm_MPISBAIJ"
508 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
509 {
510   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
511   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
512   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
513   int        ierr;
514   PetscReal  sum[2],*lnorm2;
515 
516   PetscFunctionBegin;
517   if (baij->size == 1) {
518     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
519   } else {
520     if (type == NORM_FROBENIUS) {
521       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
522       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
523       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
524       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
525       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
526       /*
527       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
528       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
529       */
530       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
531       /*
532       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
533       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
534 
535       *norm = sqrt(sum[0] + 2*sum[1]);
536       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 /*
545   Creates the hash table, and sets the table
546   This table is created only once.
547   If new entried need to be added to the matrix
548   then the hash table has to be destroyed and
549   recreated.
550 */
551 #undef __FUNCT__
552 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
553 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
554 {
555   PetscFunctionBegin;
556   SETERRQ(1,"Function not yet written for SBAIJ format");
557   /* PetscFunctionReturn(0); */
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
562 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
563 {
564   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
565   int         ierr,nstash,reallocs;
566   InsertMode  addv;
567 
568   PetscFunctionBegin;
569   if (baij->donotstash) {
570     PetscFunctionReturn(0);
571   }
572 
573   /* make sure all processors are either in INSERTMODE or ADDMODE */
574   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
575   if (addv == (ADD_VALUES|INSERT_VALUES)) {
576     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
577   }
578   mat->insertmode = addv; /* in case this processor had no cache */
579 
580   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
581   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
582   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
583   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
584   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
585   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
591 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
592 {
593   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
594   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
595   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
596   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
597   int         *row,*col,other_disassembled;
598   PetscTruth  r1,r2,r3;
599   MatScalar   *val;
600   InsertMode  addv = mat->insertmode;
601 #if defined(PETSC_HAVE_SPOOLES)
602   PetscTruth  flag;
603 #endif
604 
605   PetscFunctionBegin;
606 
607   if (!baij->donotstash) {
608     while (1) {
609       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
610       /*
611       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
612       PetscSynchronizedFlush(PETSC_COMM_WORLD);
613       */
614       if (!flg) break;
615 
616       for (i=0; i<n;) {
617         /* Now identify the consecutive vals belonging to the same row */
618         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
619         if (j < n) ncols = j-i;
620         else       ncols = n-i;
621         /* Now assemble all these values with a single function call */
622         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
623         i = j;
624       }
625     }
626     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
627     /* Now process the block-stash. Since the values are stashed column-oriented,
628        set the roworiented flag to column oriented, and after MatSetValues()
629        restore the original flags */
630     r1 = baij->roworiented;
631     r2 = a->roworiented;
632     r3 = b->roworiented;
633     baij->roworiented = PETSC_FALSE;
634     a->roworiented    = PETSC_FALSE;
635     b->roworiented    = PETSC_FALSE;
636     while (1) {
637       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
638       if (!flg) break;
639 
640       for (i=0; i<n;) {
641         /* Now identify the consecutive vals belonging to the same row */
642         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
643         if (j < n) ncols = j-i;
644         else       ncols = n-i;
645         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
646         i = j;
647       }
648     }
649     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
650     baij->roworiented = r1;
651     a->roworiented    = r2;
652     b->roworiented    = r3;
653   }
654 
655   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
657 
658   /* determine if any processor has disassembled, if so we must
659      also disassemble ourselfs, in order that we may reassemble. */
660   /*
661      if nonzero structure of submatrix B cannot change then we know that
662      no processor disassembled thus we can skip this stuff
663   */
664   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
665     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
666     if (mat->was_assembled && !other_disassembled) {
667       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
668     }
669   }
670 
671   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
672     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
673   }
674   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
675   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
676 
677 #if defined(PETSC_USE_BOPT_g)
678   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
679     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
680     baij->ht_total_ct  = 0;
681     baij->ht_insert_ct = 0;
682   }
683 #endif
684   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
685     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
686     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
687     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
688   }
689 
690   if (baij->rowvalues) {
691     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
692     baij->rowvalues = 0;
693   }
694 
695 #if defined(PETSC_HAVE_SPOOLES)
696   ierr = PetscOptionsHasName(mat,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
697   if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); }
698 #endif
699   PetscFunctionReturn(0);
700 }
701 
702 #undef __FUNCT__
703 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
704 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
705 {
706   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
707   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
708   PetscTruth        isascii,isdraw;
709   PetscViewer       sviewer;
710   PetscViewerFormat format;
711 
712   PetscFunctionBegin;
713   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
715   if (isascii) {
716     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
717     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
718       MatInfo info;
719       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
720       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
721       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
722               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
723               baij->bs,(int)info.memory);CHKERRQ(ierr);
724       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
725       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
726       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
727       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
728       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
729       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
730       PetscFunctionReturn(0);
731     } else if (format == PETSC_VIEWER_ASCII_INFO) {
732       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
733       PetscFunctionReturn(0);
734     }
735   }
736 
737   if (isdraw) {
738     PetscDraw       draw;
739     PetscTruth isnull;
740     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
741     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
742   }
743 
744   if (size == 1) {
745     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
746     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
747   } else {
748     /* assemble the entire matrix onto first processor. */
749     Mat         A;
750     Mat_SeqSBAIJ *Aloc;
751     Mat_SeqBAIJ *Bloc;
752     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
753     MatScalar   *a;
754 
755     if (!rank) {
756       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
757     } else {
758       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
759     }
760     PetscLogObjectParent(mat,A);
761 
762     /* copy over the A part */
763     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
764     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
765     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
766 
767     for (i=0; i<mbs; i++) {
768       rvals[0] = bs*(baij->rstart + i);
769       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
770       for (j=ai[i]; j<ai[i+1]; j++) {
771         col = (baij->cstart+aj[j])*bs;
772         for (k=0; k<bs; k++) {
773           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
774           col++; a += bs;
775         }
776       }
777     }
778     /* copy over the B part */
779     Bloc = (Mat_SeqBAIJ*)baij->B->data;
780     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
781     for (i=0; i<mbs; i++) {
782       rvals[0] = bs*(baij->rstart + i);
783       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
784       for (j=ai[i]; j<ai[i+1]; j++) {
785         col = baij->garray[aj[j]]*bs;
786         for (k=0; k<bs; k++) {
787           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
788           col++; a += bs;
789         }
790       }
791     }
792     ierr = PetscFree(rvals);CHKERRQ(ierr);
793     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
794     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     /*
796        Everyone has to call to draw the matrix since the graphics waits are
797        synchronized across all processors that share the PetscDraw object
798     */
799     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
800     if (!rank) {
801       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
802       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
803     }
804     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
805     ierr = MatDestroy(A);CHKERRQ(ierr);
806   }
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatView_MPISBAIJ"
812 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
813 {
814   int        ierr;
815   PetscTruth isascii,isdraw,issocket,isbinary;
816 
817   PetscFunctionBegin;
818   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
819   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
820   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
822   if (isascii || isdraw || issocket || isbinary) {
823     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
824   } else {
825     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
826   }
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatDestroy_MPISBAIJ"
832 int MatDestroy_MPISBAIJ(Mat mat)
833 {
834   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
835   int         ierr;
836 
837   PetscFunctionBegin;
838 #if defined(PETSC_USE_LOG)
839   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
840 #endif
841   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
842   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
843   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
844   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
845   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
846 #if defined (PETSC_USE_CTABLE)
847   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
848 #else
849   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
850 #endif
851   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
852   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
853   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
854   if (baij->slvec0) {
855     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
856     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
857   }
858   if (baij->slvec1) {
859     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
860     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
861     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
862   }
863   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
864   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
865   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
866   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
867 #if defined(PETSC_USE_MAT_SINGLE)
868   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
869 #endif
870   ierr = PetscFree(baij);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatMult_MPISBAIJ"
876 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
877 {
878   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
879   int         ierr,nt,mbs=a->mbs,bs=a->bs;
880   PetscScalar *x,*from,zero=0.0;
881 
882   PetscFunctionBegin;
883   /*
884   PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n");
885   PetscSynchronizedFlush(PETSC_COMM_WORLD);
886   */
887   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
888   if (nt != A->n) {
889     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
890   }
891   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
892   if (nt != A->m) {
893     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
894   }
895 
896   /* diagonal part */
897   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
898   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
899 
900   /* subdiagonal part */
901   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
902 
903   /* copy x into the vec slvec0 */
904   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
905   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
906   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
907   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
908 
909   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
910   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
911   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
912 
913   /* supperdiagonal part */
914   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
915 
916   PetscFunctionReturn(0);
917 }
918 
919 #undef __FUNCT__
920 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
921 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
922 {
923   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
924   int         ierr,nt;
925 
926   PetscFunctionBegin;
927   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
928   if (nt != A->n) {
929     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
930   }
931   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
932   if (nt != A->m) {
933     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
934   }
935 
936   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
937   /* do diagonal part */
938   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
939   /* do supperdiagonal part */
940   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
941   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
942   /* do subdiagonal part */
943   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
944   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
945   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
946 
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
952 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
953 {
954   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
955   int          ierr,mbs=a->mbs,bs=a->bs;
956   PetscScalar  *x,*from,zero=0.0;
957 
958   PetscFunctionBegin;
959   /*
960   PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n");
961   PetscSynchronizedFlush(PETSC_COMM_WORLD);
962   */
963   /* diagonal part */
964   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
965   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
966 
967   /* subdiagonal part */
968   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
969 
970   /* copy x into the vec slvec0 */
971   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
972   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
973   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
974   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
975 
976   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
977   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
978   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
979 
980   /* supperdiagonal part */
981   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
982 
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
988 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
989 {
990   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
991   int        ierr;
992 
993   PetscFunctionBegin;
994   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
995   /* do diagonal part */
996   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
997   /* do supperdiagonal part */
998   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
999   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1000 
1001   /* do subdiagonal part */
1002   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1003   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1004   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1005 
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1011 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1012 {
1013   PetscFunctionBegin;
1014   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1015   /* PetscFunctionReturn(0); */
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1020 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   PetscFunctionBegin;
1023   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1024   /* PetscFunctionReturn(0); */
1025 }
1026 
1027 /*
1028   This only works correctly for square matrices where the subblock A->A is the
1029    diagonal block
1030 */
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1033 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1034 {
1035   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1036   int         ierr;
1037 
1038   PetscFunctionBegin;
1039   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1040   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatScale_MPISBAIJ"
1046 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A)
1047 {
1048   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1049   int         ierr;
1050 
1051   PetscFunctionBegin;
1052   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1053   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1059 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1060 {
1061   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1062   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1063   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1064   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1065   int            *cmap,*idx_p,cstart = mat->cstart;
1066 
1067   PetscFunctionBegin;
1068   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1069   mat->getrowactive = PETSC_TRUE;
1070 
1071   if (!mat->rowvalues && (idx || v)) {
1072     /*
1073         allocate enough space to hold information from the longest row.
1074     */
1075     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1076     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1077     int     max = 1,mbs = mat->mbs,tmp;
1078     for (i=0; i<mbs; i++) {
1079       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1080       if (max < tmp) { max = tmp; }
1081     }
1082     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1083     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1084   }
1085 
1086   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1087   lrow = row - brstart;  /* local row index */
1088 
1089   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1090   if (!v)   {pvA = 0; pvB = 0;}
1091   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1092   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1093   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1094   nztot = nzA + nzB;
1095 
1096   cmap  = mat->garray;
1097   if (v  || idx) {
1098     if (nztot) {
1099       /* Sort by increasing column numbers, assuming A and B already sorted */
1100       int imark = -1;
1101       if (v) {
1102         *v = v_p = mat->rowvalues;
1103         for (i=0; i<nzB; i++) {
1104           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1105           else break;
1106         }
1107         imark = i;
1108         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1109         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1110       }
1111       if (idx) {
1112         *idx = idx_p = mat->rowindices;
1113         if (imark > -1) {
1114           for (i=0; i<imark; i++) {
1115             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1116           }
1117         } else {
1118           for (i=0; i<nzB; i++) {
1119             if (cmap[cworkB[i]/bs] < cstart)
1120               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1121             else break;
1122           }
1123           imark = i;
1124         }
1125         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1126         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1127       }
1128     } else {
1129       if (idx) *idx = 0;
1130       if (v)   *v   = 0;
1131     }
1132   }
1133   *nz = nztot;
1134   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1135   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1141 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1142 {
1143   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1144 
1145   PetscFunctionBegin;
1146   if (baij->getrowactive == PETSC_FALSE) {
1147     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1148   }
1149   baij->getrowactive = PETSC_FALSE;
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1155 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1156 {
1157   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1158 
1159   PetscFunctionBegin;
1160   *bs = baij->bs;
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1166 int MatZeroEntries_MPISBAIJ(Mat A)
1167 {
1168   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1169   int         ierr;
1170 
1171   PetscFunctionBegin;
1172   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1173   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1179 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1180 {
1181   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1182   Mat         A = a->A,B = a->B;
1183   int         ierr;
1184   PetscReal   isend[5],irecv[5];
1185 
1186   PetscFunctionBegin;
1187   info->block_size     = (PetscReal)a->bs;
1188   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1189   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1190   isend[3] = info->memory;  isend[4] = info->mallocs;
1191   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1192   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1193   isend[3] += info->memory;  isend[4] += info->mallocs;
1194   if (flag == MAT_LOCAL) {
1195     info->nz_used      = isend[0];
1196     info->nz_allocated = isend[1];
1197     info->nz_unneeded  = isend[2];
1198     info->memory       = isend[3];
1199     info->mallocs      = isend[4];
1200   } else if (flag == MAT_GLOBAL_MAX) {
1201     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1202     info->nz_used      = irecv[0];
1203     info->nz_allocated = irecv[1];
1204     info->nz_unneeded  = irecv[2];
1205     info->memory       = irecv[3];
1206     info->mallocs      = irecv[4];
1207   } else if (flag == MAT_GLOBAL_SUM) {
1208     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1209     info->nz_used      = irecv[0];
1210     info->nz_allocated = irecv[1];
1211     info->nz_unneeded  = irecv[2];
1212     info->memory       = irecv[3];
1213     info->mallocs      = irecv[4];
1214   } else {
1215     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1216   }
1217   info->rows_global       = (PetscReal)A->M;
1218   info->columns_global    = (PetscReal)A->N;
1219   info->rows_local        = (PetscReal)A->m;
1220   info->columns_local     = (PetscReal)A->N;
1221   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1222   info->fill_ratio_needed = 0;
1223   info->factor_mallocs    = 0;
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1229 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1230 {
1231   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1232   int         ierr;
1233 
1234   PetscFunctionBegin;
1235   switch (op) {
1236   case MAT_NO_NEW_NONZERO_LOCATIONS:
1237   case MAT_YES_NEW_NONZERO_LOCATIONS:
1238   case MAT_COLUMNS_UNSORTED:
1239   case MAT_COLUMNS_SORTED:
1240   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1241   case MAT_KEEP_ZEROED_ROWS:
1242   case MAT_NEW_NONZERO_LOCATION_ERR:
1243     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1244     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1245     break;
1246   case MAT_ROW_ORIENTED:
1247     a->roworiented = PETSC_TRUE;
1248     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1249     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1250     break;
1251   case MAT_ROWS_SORTED:
1252   case MAT_ROWS_UNSORTED:
1253   case MAT_YES_NEW_DIAGONALS:
1254   case MAT_USE_SINGLE_PRECISION_SOLVES:
1255     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1256     break;
1257   case MAT_COLUMN_ORIENTED:
1258     a->roworiented = PETSC_FALSE;
1259     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1260     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1261     break;
1262   case MAT_IGNORE_OFF_PROC_ENTRIES:
1263     a->donotstash = PETSC_TRUE;
1264     break;
1265   case MAT_NO_NEW_DIAGONALS:
1266     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1267   case MAT_USE_HASH_TABLE:
1268     a->ht_flag = PETSC_TRUE;
1269     break;
1270   default:
1271     SETERRQ(PETSC_ERR_SUP,"unknown option");
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1278 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1279 {
1280   int ierr;
1281   PetscFunctionBegin;
1282   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1288 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1289 {
1290   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1291   Mat         a = baij->A,b = baij->B;
1292   int         ierr,s1,s2,s3;
1293 
1294   PetscFunctionBegin;
1295   if (ll != rr) {
1296     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1297   }
1298   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1299   if (rr) {
1300     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1301     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1302     /* Overlap communication with computation. */
1303     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1304     /*} if (ll) { */
1305     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1306     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1307     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1308     /* } */
1309   /* scale  the diagonal block */
1310   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1311 
1312   /* if (rr) { */
1313     /* Do a scatter end and then right scale the off-diagonal block */
1314     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1315     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1316   }
1317 
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1323 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag)
1324 {
1325   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1326   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1327   int            *procs,*nprocs,j,idx,nsends,*work,row;
1328   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1329   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1330   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1331   MPI_Comm       comm = A->comm;
1332   MPI_Request    *send_waits,*recv_waits;
1333   MPI_Status     recv_status,*send_status;
1334   IS             istmp;
1335   PetscTruth     found;
1336 
1337   PetscFunctionBegin;
1338   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1339   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1340 
1341   /*  first count number of contributors to each processor */
1342   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1343   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1344   procs = nprocs + size;
1345   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1346   for (i=0; i<N; i++) {
1347     idx   = rows[i];
1348     found = PETSC_FALSE;
1349     for (j=0; j<size; j++) {
1350       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1351         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1352       }
1353     }
1354     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1355   }
1356   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1357 
1358   /* inform other processors of number of messages and max length*/
1359   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1360   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1361   nmax   = work[rank];
1362   nrecvs = work[size+rank];
1363   ierr   = PetscFree(work);CHKERRQ(ierr);
1364 
1365   /* post receives:   */
1366   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1367   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1368   for (i=0; i<nrecvs; i++) {
1369     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1370   }
1371 
1372   /* do sends:
1373      1) starts[i] gives the starting index in svalues for stuff going to
1374      the ith processor
1375   */
1376   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1377   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1378   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1379   starts[0]  = 0;
1380   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1381   for (i=0; i<N; i++) {
1382     svalues[starts[owner[i]]++] = rows[i];
1383   }
1384   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1385 
1386   starts[0] = 0;
1387   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1388   count = 0;
1389   for (i=0; i<size; i++) {
1390     if (procs[i]) {
1391       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1392     }
1393   }
1394   ierr = PetscFree(starts);CHKERRQ(ierr);
1395 
1396   base = owners[rank]*bs;
1397 
1398   /*  wait on receives */
1399   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1400   source = lens + nrecvs;
1401   count  = nrecvs; slen = 0;
1402   while (count) {
1403     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1404     /* unpack receives into our local space */
1405     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1406     source[imdex]  = recv_status.MPI_SOURCE;
1407     lens[imdex]    = n;
1408     slen          += n;
1409     count--;
1410   }
1411   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1412 
1413   /* move the data into the send scatter */
1414   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1415   count = 0;
1416   for (i=0; i<nrecvs; i++) {
1417     values = rvalues + i*nmax;
1418     for (j=0; j<lens[i]; j++) {
1419       lrows[count++] = values[j] - base;
1420     }
1421   }
1422   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1423   ierr = PetscFree(lens);CHKERRQ(ierr);
1424   ierr = PetscFree(owner);CHKERRQ(ierr);
1425   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1426 
1427   /* actually zap the local rows */
1428   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1429   PetscLogObjectParent(A,istmp);
1430 
1431   /*
1432         Zero the required rows. If the "diagonal block" of the matrix
1433      is square and the user wishes to set the diagonal we use seperate
1434      code so that MatSetValues() is not called for each diagonal allocating
1435      new memory, thus calling lots of mallocs and slowing things down.
1436 
1437        Contributed by: Mathew Knepley
1438   */
1439   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1440   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1441   if (diag && (l->A->M == l->A->N)) {
1442     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1443   } else if (diag) {
1444     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1445     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1446       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1447 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1448     }
1449     for (i=0; i<slen; i++) {
1450       row  = lrows[i] + rstart_bs;
1451       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1452     }
1453     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455   } else {
1456     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1457   }
1458 
1459   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1460   ierr = PetscFree(lrows);CHKERRQ(ierr);
1461 
1462   /* wait on sends */
1463   if (nsends) {
1464     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1465     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1466     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1467   }
1468   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1469   ierr = PetscFree(svalues);CHKERRQ(ierr);
1470 
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1476 int MatPrintHelp_MPISBAIJ(Mat A)
1477 {
1478   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1479   MPI_Comm    comm = A->comm;
1480   static int  called = 0;
1481   int         ierr;
1482 
1483   PetscFunctionBegin;
1484   if (!a->rank) {
1485     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1486   }
1487   if (called) {PetscFunctionReturn(0);} else called = 1;
1488   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1489   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1495 int MatSetUnfactored_MPISBAIJ(Mat A)
1496 {
1497   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1498   int         ierr;
1499 
1500   PetscFunctionBegin;
1501   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatEqual_MPISBAIJ"
1509 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1510 {
1511   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1512   Mat         a,b,c,d;
1513   PetscTruth  flg;
1514   int         ierr;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr);
1518   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1519   a = matA->A; b = matA->B;
1520   c = matB->A; d = matB->B;
1521 
1522   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1523   if (flg == PETSC_TRUE) {
1524     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1525   }
1526   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1532 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1533 {
1534   int        ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 /* -------------------------------------------------------------------*/
1541 static struct _MatOps MatOps_Values = {
1542   MatSetValues_MPISBAIJ,
1543   MatGetRow_MPISBAIJ,
1544   MatRestoreRow_MPISBAIJ,
1545   MatMult_MPISBAIJ,
1546   MatMultAdd_MPISBAIJ,
1547   MatMultTranspose_MPISBAIJ,
1548   MatMultTransposeAdd_MPISBAIJ,
1549   0,
1550   0,
1551   0,
1552   0,
1553   0,
1554   0,
1555   MatRelax_MPISBAIJ,
1556   MatTranspose_MPISBAIJ,
1557   MatGetInfo_MPISBAIJ,
1558   MatEqual_MPISBAIJ,
1559   MatGetDiagonal_MPISBAIJ,
1560   MatDiagonalScale_MPISBAIJ,
1561   MatNorm_MPISBAIJ,
1562   MatAssemblyBegin_MPISBAIJ,
1563   MatAssemblyEnd_MPISBAIJ,
1564   0,
1565   MatSetOption_MPISBAIJ,
1566   MatZeroEntries_MPISBAIJ,
1567   MatZeroRows_MPISBAIJ,
1568   0,
1569   0,
1570   0,
1571   0,
1572   MatSetUpPreallocation_MPISBAIJ,
1573   0,
1574   0,
1575   0,
1576   0,
1577   MatDuplicate_MPISBAIJ,
1578   0,
1579   0,
1580   0,
1581   0,
1582   0,
1583   MatGetSubMatrices_MPISBAIJ,
1584   MatIncreaseOverlap_MPISBAIJ,
1585   MatGetValues_MPISBAIJ,
1586   0,
1587   MatPrintHelp_MPISBAIJ,
1588   MatScale_MPISBAIJ,
1589   0,
1590   0,
1591   0,
1592   MatGetBlockSize_MPISBAIJ,
1593   0,
1594   0,
1595   0,
1596   0,
1597   0,
1598   0,
1599   MatSetUnfactored_MPISBAIJ,
1600   0,
1601   MatSetValuesBlocked_MPISBAIJ,
1602   0,
1603   0,
1604   0,
1605   MatGetPetscMaps_Petsc,
1606   0,
1607   0,
1608   0,
1609   0,
1610   0,
1611   0,
1612   MatGetRowMax_MPISBAIJ};
1613 
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1618 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1619 {
1620   PetscFunctionBegin;
1621   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1622   *iscopy = PETSC_FALSE;
1623   PetscFunctionReturn(0);
1624 }
1625 EXTERN_C_END
1626 
1627 EXTERN_C_BEGIN
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatCreate_MPISBAIJ"
1630 int MatCreate_MPISBAIJ(Mat B)
1631 {
1632   Mat_MPISBAIJ *b;
1633   int          ierr;
1634   PetscTruth   flg;
1635 
1636   PetscFunctionBegin;
1637 
1638   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1639   B->data = (void*)b;
1640   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1641   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1642 
1643   B->ops->destroy    = MatDestroy_MPISBAIJ;
1644   B->ops->view       = MatView_MPISBAIJ;
1645   B->mapping    = 0;
1646   B->factor     = 0;
1647   B->assembled  = PETSC_FALSE;
1648 
1649   B->insertmode = NOT_SET_VALUES;
1650   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1651   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1652 
1653   /* build local table of row and column ownerships */
1654   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1655   b->cowners    = b->rowners + b->size + 2;
1656   b->rowners_bs = b->cowners + b->size + 2;
1657   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1658 
1659   /* build cache for off array entries formed */
1660   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1661   b->donotstash  = PETSC_FALSE;
1662   b->colmap      = PETSC_NULL;
1663   b->garray      = PETSC_NULL;
1664   b->roworiented = PETSC_TRUE;
1665 
1666 #if defined(PETSC_USE_MAT_SINGLE)
1667   /* stuff for MatSetValues_XXX in single precision */
1668   b->setvalueslen     = 0;
1669   b->setvaluescopy    = PETSC_NULL;
1670 #endif
1671 
1672   /* stuff used in block assembly */
1673   b->barray       = 0;
1674 
1675   /* stuff used for matrix vector multiply */
1676   b->lvec         = 0;
1677   b->Mvctx        = 0;
1678   b->slvec0       = 0;
1679   b->slvec0b      = 0;
1680   b->slvec1       = 0;
1681   b->slvec1a      = 0;
1682   b->slvec1b      = 0;
1683   b->sMvctx       = 0;
1684 
1685   /* stuff for MatGetRow() */
1686   b->rowindices   = 0;
1687   b->rowvalues    = 0;
1688   b->getrowactive = PETSC_FALSE;
1689 
1690   /* hash table stuff */
1691   b->ht           = 0;
1692   b->hd           = 0;
1693   b->ht_size      = 0;
1694   b->ht_flag      = PETSC_FALSE;
1695   b->ht_fact      = 0;
1696   b->ht_total_ct  = 0;
1697   b->ht_insert_ct = 0;
1698 
1699   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1700   if (flg) {
1701     PetscReal fact = 1.39;
1702     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1703     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1704     if (fact <= 1.0) fact = 1.39;
1705     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1706     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1707   }
1708   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1709                                      "MatStoreValues_MPISBAIJ",
1710                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1711   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1712                                      "MatRetrieveValues_MPISBAIJ",
1713                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1714   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1715                                      "MatGetDiagonalBlock_MPISBAIJ",
1716                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 EXTERN_C_END
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1723 /*@C
1724    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1725    the user should preallocate the matrix storage by setting the parameters
1726    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1727    performance can be increased by more than a factor of 50.
1728 
1729    Collective on Mat
1730 
1731    Input Parameters:
1732 +  A - the matrix
1733 .  bs   - size of blockk
1734 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1735            submatrix  (same for all local rows)
1736 .  d_nnz - array containing the number of block nonzeros in the various block rows
1737            in the upper triangular and diagonal part of the in diagonal portion of the local
1738            (possibly different for each block row) or PETSC_NULL.  You must leave room
1739            for the diagonal entry even if it is zero.
1740 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1741            submatrix (same for all local rows).
1742 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1743            off-diagonal portion of the local submatrix (possibly different for
1744            each block row) or PETSC_NULL.
1745 
1746 
1747    Options Database Keys:
1748 .   -mat_no_unroll - uses code that does not unroll the loops in the
1749                      block calculations (much slower)
1750 .   -mat_block_size - size of the blocks to use
1751 
1752    Notes:
1753 
1754    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1755    than it must be used on all processors that share the object for that argument.
1756 
1757    Storage Information:
1758    For a square global matrix we define each processor's diagonal portion
1759    to be its local rows and the corresponding columns (a square submatrix);
1760    each processor's off-diagonal portion encompasses the remainder of the
1761    local matrix (a rectangular submatrix).
1762 
1763    The user can specify preallocated storage for the diagonal part of
1764    the local submatrix with either d_nz or d_nnz (not both).  Set
1765    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1766    memory allocation.  Likewise, specify preallocated storage for the
1767    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1768 
1769    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1770    the figure below we depict these three local rows and all columns (0-11).
1771 
1772 .vb
1773            0 1 2 3 4 5 6 7 8 9 10 11
1774           -------------------
1775    row 3  |  o o o d d d o o o o o o
1776    row 4  |  o o o d d d o o o o o o
1777    row 5  |  o o o d d d o o o o o o
1778           -------------------
1779 .ve
1780 
1781    Thus, any entries in the d locations are stored in the d (diagonal)
1782    submatrix, and any entries in the o locations are stored in the
1783    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1784    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1785 
1786    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1787    plus the diagonal part of the d matrix,
1788    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1789    In general, for PDE problems in which most nonzeros are near the diagonal,
1790    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1791    or you will get TERRIBLE performance; see the users' manual chapter on
1792    matrices.
1793 
1794    Level: intermediate
1795 
1796 .keywords: matrix, block, aij, compressed row, sparse, parallel
1797 
1798 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1799 @*/
1800 
1801 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1802 {
1803   Mat_MPISBAIJ *b;
1804   int          ierr,i,mbs,Mbs;
1805   PetscTruth   flg2;
1806 
1807   PetscFunctionBegin;
1808   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr);
1809   if (!flg2) PetscFunctionReturn(0);
1810 
1811   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1812 
1813   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1814   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1815   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1816   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1817   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1818   if (d_nnz) {
1819     for (i=0; i<B->m/bs; i++) {
1820       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1821     }
1822   }
1823   if (o_nnz) {
1824     for (i=0; i<B->m/bs; i++) {
1825       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1826     }
1827   }
1828   B->preallocated = PETSC_TRUE;
1829   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1830   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1831   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1832   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1833 
1834   b   = (Mat_MPISBAIJ*)B->data;
1835   mbs = B->m/bs;
1836   Mbs = B->M/bs;
1837   if (mbs*bs != B->m) {
1838     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1839   }
1840 
1841   b->bs  = bs;
1842   b->bs2 = bs*bs;
1843   b->mbs = mbs;
1844   b->nbs = mbs;
1845   b->Mbs = Mbs;
1846   b->Nbs = Mbs;
1847 
1848   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1849   b->rowners[0]    = 0;
1850   for (i=2; i<=b->size; i++) {
1851     b->rowners[i] += b->rowners[i-1];
1852   }
1853   b->rstart    = b->rowners[b->rank];
1854   b->rend      = b->rowners[b->rank+1];
1855   b->cstart    = b->rstart;
1856   b->cend      = b->rend;
1857   for (i=0; i<=b->size; i++) {
1858     b->rowners_bs[i] = b->rowners[i]*bs;
1859   }
1860   b->rstart_bs = b-> rstart*bs;
1861   b->rend_bs   = b->rend*bs;
1862 
1863   b->cstart_bs = b->cstart*bs;
1864   b->cend_bs   = b->cend*bs;
1865 
1866 
1867   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1868   PetscLogObjectParent(B,b->A);
1869   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1870   PetscLogObjectParent(B,b->B);
1871 
1872   /* build cache for off array entries formed */
1873   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1874 
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatCreateMPISBAIJ"
1880 /*@C
1881    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1882    (block compressed row).  For good matrix assembly performance
1883    the user should preallocate the matrix storage by setting the parameters
1884    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1885    performance can be increased by more than a factor of 50.
1886 
1887    Collective on MPI_Comm
1888 
1889    Input Parameters:
1890 +  comm - MPI communicator
1891 .  bs   - size of blockk
1892 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1893            This value should be the same as the local size used in creating the
1894            y vector for the matrix-vector product y = Ax.
1895 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1896            This value should be the same as the local size used in creating the
1897            x vector for the matrix-vector product y = Ax.
1898 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1899 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1900 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1901            submatrix  (same for all local rows)
1902 .  d_nnz - array containing the number of block nonzeros in the various block rows
1903            in the upper triangular portion of the in diagonal portion of the local
1904            (possibly different for each block block row) or PETSC_NULL.
1905            You must leave room for the diagonal entry even if it is zero.
1906 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1907            submatrix (same for all local rows).
1908 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1909            off-diagonal portion of the local submatrix (possibly different for
1910            each block row) or PETSC_NULL.
1911 
1912    Output Parameter:
1913 .  A - the matrix
1914 
1915    Options Database Keys:
1916 .   -mat_no_unroll - uses code that does not unroll the loops in the
1917                      block calculations (much slower)
1918 .   -mat_block_size - size of the blocks to use
1919 .   -mat_mpi - use the parallel matrix data structures even on one processor
1920                (defaults to using SeqBAIJ format on one processor)
1921 
1922    Notes:
1923    The user MUST specify either the local or global matrix dimensions
1924    (possibly both).
1925 
1926    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1927    than it must be used on all processors that share the object for that argument.
1928 
1929    Storage Information:
1930    For a square global matrix we define each processor's diagonal portion
1931    to be its local rows and the corresponding columns (a square submatrix);
1932    each processor's off-diagonal portion encompasses the remainder of the
1933    local matrix (a rectangular submatrix).
1934 
1935    The user can specify preallocated storage for the diagonal part of
1936    the local submatrix with either d_nz or d_nnz (not both).  Set
1937    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1938    memory allocation.  Likewise, specify preallocated storage for the
1939    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1940 
1941    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1942    the figure below we depict these three local rows and all columns (0-11).
1943 
1944 .vb
1945            0 1 2 3 4 5 6 7 8 9 10 11
1946           -------------------
1947    row 3  |  o o o d d d o o o o o o
1948    row 4  |  o o o d d d o o o o o o
1949    row 5  |  o o o d d d o o o o o o
1950           -------------------
1951 .ve
1952 
1953    Thus, any entries in the d locations are stored in the d (diagonal)
1954    submatrix, and any entries in the o locations are stored in the
1955    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1956    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1957 
1958    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1959    plus the diagonal part of the d matrix,
1960    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1961    In general, for PDE problems in which most nonzeros are near the diagonal,
1962    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1963    or you will get TERRIBLE performance; see the users' manual chapter on
1964    matrices.
1965 
1966    Level: intermediate
1967 
1968 .keywords: matrix, block, aij, compressed row, sparse, parallel
1969 
1970 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1971 @*/
1972 
1973 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1974 {
1975   int ierr,size;
1976 
1977   PetscFunctionBegin;
1978   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1979   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1980   if (size > 1) {
1981     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1982     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1983   } else {
1984     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1985     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1986   }
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1993 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1994 {
1995   Mat          mat;
1996   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1997   int          ierr,len=0;
1998 
1999   PetscFunctionBegin;
2000   *newmat       = 0;
2001   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2002   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2003   mat->preallocated = PETSC_TRUE;
2004   a = (Mat_MPISBAIJ*)mat->data;
2005   a->bs  = oldmat->bs;
2006   a->bs2 = oldmat->bs2;
2007   a->mbs = oldmat->mbs;
2008   a->nbs = oldmat->nbs;
2009   a->Mbs = oldmat->Mbs;
2010   a->Nbs = oldmat->Nbs;
2011 
2012   a->rstart       = oldmat->rstart;
2013   a->rend         = oldmat->rend;
2014   a->cstart       = oldmat->cstart;
2015   a->cend         = oldmat->cend;
2016   a->size         = oldmat->size;
2017   a->rank         = oldmat->rank;
2018   a->donotstash   = oldmat->donotstash;
2019   a->roworiented  = oldmat->roworiented;
2020   a->rowindices   = 0;
2021   a->rowvalues    = 0;
2022   a->getrowactive = PETSC_FALSE;
2023   a->barray       = 0;
2024   a->rstart_bs    = oldmat->rstart_bs;
2025   a->rend_bs      = oldmat->rend_bs;
2026   a->cstart_bs    = oldmat->cstart_bs;
2027   a->cend_bs      = oldmat->cend_bs;
2028 
2029   /* hash table stuff */
2030   a->ht           = 0;
2031   a->hd           = 0;
2032   a->ht_size      = 0;
2033   a->ht_flag      = oldmat->ht_flag;
2034   a->ht_fact      = oldmat->ht_fact;
2035   a->ht_total_ct  = 0;
2036   a->ht_insert_ct = 0;
2037 
2038   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2039   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2040   a->cowners    = a->rowners + a->size + 2;
2041   a->rowners_bs = a->cowners + a->size + 2;
2042   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2043   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2044   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2045   if (oldmat->colmap) {
2046 #if defined (PETSC_USE_CTABLE)
2047     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2048 #else
2049     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2050     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2051     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2052 #endif
2053   } else a->colmap = 0;
2054   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2055     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2056     PetscLogObjectMemory(mat,len*sizeof(int));
2057     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2058   } else a->garray = 0;
2059 
2060   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2061   PetscLogObjectParent(mat,a->lvec);
2062   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2063 
2064   PetscLogObjectParent(mat,a->Mvctx);
2065   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2066   PetscLogObjectParent(mat,a->A);
2067   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2068   PetscLogObjectParent(mat,a->B);
2069   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2070   *newmat = mat;
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #include "petscsys.h"
2075 
2076 EXTERN_C_BEGIN
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatLoad_MPISBAIJ"
2079 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2080 {
2081   Mat          A;
2082   int          i,nz,ierr,j,rstart,rend,fd;
2083   PetscScalar  *vals,*buf;
2084   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2085   MPI_Status   status;
2086   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2087   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2088   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2089   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2090   int          dcount,kmax,k,nzcount,tmp;
2091 
2092   PetscFunctionBegin;
2093   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2094 
2095   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2096   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2097   if (!rank) {
2098     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2099     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2100     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2101     if (header[3] < 0) {
2102       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2103     }
2104   }
2105 
2106   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2107   M = header[1]; N = header[2];
2108 
2109   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2110 
2111   /*
2112      This code adds extra rows to make sure the number of rows is
2113      divisible by the blocksize
2114   */
2115   Mbs        = M/bs;
2116   extra_rows = bs - M + bs*(Mbs);
2117   if (extra_rows == bs) extra_rows = 0;
2118   else                  Mbs++;
2119   if (extra_rows &&!rank) {
2120     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2121   }
2122 
2123   /* determine ownership of all rows */
2124   mbs        = Mbs/size + ((Mbs % size) > rank);
2125   m          = mbs*bs;
2126   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2127   browners   = rowners + size + 1;
2128   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2129   rowners[0] = 0;
2130   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2131   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2132   rstart = rowners[rank];
2133   rend   = rowners[rank+1];
2134 
2135   /* distribute row lengths to all processors */
2136   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2137   if (!rank) {
2138     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2139     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2140     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2141     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2142     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2143     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2144     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2145   } else {
2146     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2147   }
2148 
2149   if (!rank) {   /* procs[0] */
2150     /* calculate the number of nonzeros on each processor */
2151     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2152     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2153     for (i=0; i<size; i++) {
2154       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2155         procsnz[i] += rowlengths[j];
2156       }
2157     }
2158     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2159 
2160     /* determine max buffer needed and allocate it */
2161     maxnz = 0;
2162     for (i=0; i<size; i++) {
2163       maxnz = PetscMax(maxnz,procsnz[i]);
2164     }
2165     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2166 
2167     /* read in my part of the matrix column indices  */
2168     nz     = procsnz[0];
2169     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2170     mycols = ibuf;
2171     if (size == 1)  nz -= extra_rows;
2172     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2173     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2174 
2175     /* read in every ones (except the last) and ship off */
2176     for (i=1; i<size-1; i++) {
2177       nz   = procsnz[i];
2178       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2179       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2180     }
2181     /* read in the stuff for the last proc */
2182     if (size != 1) {
2183       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2184       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2185       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2186       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2187     }
2188     ierr = PetscFree(cols);CHKERRQ(ierr);
2189   } else {  /* procs[i], i>0 */
2190     /* determine buffer space needed for message */
2191     nz = 0;
2192     for (i=0; i<m; i++) {
2193       nz += locrowlens[i];
2194     }
2195     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2196     mycols = ibuf;
2197     /* receive message of column indices*/
2198     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2199     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2200     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2201   }
2202 
2203   /* loop over local rows, determining number of off diagonal entries */
2204   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2205   odlens   = dlens + (rend-rstart);
2206   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2207   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2208   masked1  = mask    + Mbs;
2209   masked2  = masked1 + Mbs;
2210   rowcount = 0; nzcount = 0;
2211   for (i=0; i<mbs; i++) {
2212     dcount  = 0;
2213     odcount = 0;
2214     for (j=0; j<bs; j++) {
2215       kmax = locrowlens[rowcount];
2216       for (k=0; k<kmax; k++) {
2217         tmp = mycols[nzcount++]/bs; /* block col. index */
2218         if (!mask[tmp]) {
2219           mask[tmp] = 1;
2220           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2221           else masked1[dcount++] = tmp; /* entry in diag portion */
2222         }
2223       }
2224       rowcount++;
2225     }
2226 
2227     dlens[i]  = dcount;  /* d_nzz[i] */
2228     odlens[i] = odcount; /* o_nzz[i] */
2229 
2230     /* zero out the mask elements we set */
2231     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2232     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2233   }
2234 
2235   /* create our matrix */
2236   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2237   CHKERRQ(ierr);
2238   A = *newmat;
2239   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2240 
2241   if (!rank) {
2242     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2243     /* read in my part of the matrix numerical values  */
2244     nz = procsnz[0];
2245     vals = buf;
2246     mycols = ibuf;
2247     if (size == 1)  nz -= extra_rows;
2248     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2249     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2250 
2251     /* insert into matrix */
2252     jj      = rstart*bs;
2253     for (i=0; i<m; i++) {
2254       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2255       mycols += locrowlens[i];
2256       vals   += locrowlens[i];
2257       jj++;
2258     }
2259 
2260     /* read in other processors (except the last one) and ship out */
2261     for (i=1; i<size-1; i++) {
2262       nz   = procsnz[i];
2263       vals = buf;
2264       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2265       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2266     }
2267     /* the last proc */
2268     if (size != 1){
2269       nz   = procsnz[i] - extra_rows;
2270       vals = buf;
2271       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2272       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2273       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2274     }
2275     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2276 
2277   } else {
2278     /* receive numeric values */
2279     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2280 
2281     /* receive message of values*/
2282     vals   = buf;
2283     mycols = ibuf;
2284     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2285     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2287 
2288     /* insert into matrix */
2289     jj      = rstart*bs;
2290     for (i=0; i<m; i++) {
2291       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2292       mycols += locrowlens[i];
2293       vals   += locrowlens[i];
2294       jj++;
2295     }
2296   }
2297 
2298   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2299   ierr = PetscFree(buf);CHKERRQ(ierr);
2300   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2301   ierr = PetscFree(rowners);CHKERRQ(ierr);
2302   ierr = PetscFree(dlens);CHKERRQ(ierr);
2303   ierr = PetscFree(mask);CHKERRQ(ierr);
2304   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 EXTERN_C_END
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2312 /*@
2313    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2314 
2315    Input Parameters:
2316 .  mat  - the matrix
2317 .  fact - factor
2318 
2319    Collective on Mat
2320 
2321    Level: advanced
2322 
2323   Notes:
2324    This can also be set by the command line option: -mat_use_hash_table fact
2325 
2326 .keywords: matrix, hashtable, factor, HT
2327 
2328 .seealso: MatSetOption()
2329 @*/
2330 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2331 {
2332   PetscFunctionBegin;
2333   SETERRQ(1,"Function not yet written for SBAIJ format");
2334   /* PetscFunctionReturn(0); */
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2339 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2340 {
2341   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2342   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2343   PetscReal    atmp;
2344   PetscReal    *work,*svalues,*rvalues;
2345   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2346   int          rank,size,*rowners_bs,dest,count,source;
2347   PetscScalar  *va;
2348   MatScalar    *ba;
2349   MPI_Status   stat;
2350 
2351   PetscFunctionBegin;
2352   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2353   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2354 
2355   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
2356   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
2357 
2358   bs   = a->bs;
2359   mbs  = a->mbs;
2360   Mbs  = a->Mbs;
2361   ba   = b->a;
2362   bi   = b->i;
2363   bj   = b->j;
2364   /*
2365   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2366   PetscSynchronizedFlush(PETSC_COMM_WORLD);
2367   */
2368 
2369   /* find ownerships */
2370   rowners_bs = a->rowners_bs;
2371   /*
2372   if (!rank){
2373     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2374   }
2375   */
2376 
2377   /* each proc creates an array to be distributed */
2378   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2379   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2380 
2381   /* row_max for B */
2382   if (rank != size-1){
2383     for (i=0; i<mbs; i++) {
2384       ncols = bi[1] - bi[0]; bi++;
2385       brow  = bs*i;
2386       for (j=0; j<ncols; j++){
2387         bcol = bs*(*bj);
2388         for (kcol=0; kcol<bs; kcol++){
2389           col = bcol + kcol;                 /* local col index */
2390           col += rowners_bs[rank+1];      /* global col index */
2391           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2392           for (krow=0; krow<bs; krow++){
2393             atmp = PetscAbsScalar(*ba); ba++;
2394             row = brow + krow;    /* local row index */
2395             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2396             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2397             if (work[col] < atmp) work[col] = atmp;
2398           }
2399         }
2400         bj++;
2401       }
2402     }
2403     /*
2404       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2405       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2406       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2407       */
2408 
2409     /* send values to its owners */
2410     for (dest=rank+1; dest<size; dest++){
2411       svalues = work + rowners_bs[dest];
2412       count   = rowners_bs[dest+1]-rowners_bs[dest];
2413       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr);
2414       /*
2415       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2416       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2417       */
2418     }
2419   }
2420 
2421   /* receive values */
2422   if (rank){
2423     rvalues = work;
2424     count   = rowners_bs[rank+1]-rowners_bs[rank];
2425     for (source=0; source<rank; source++){
2426       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr);
2427       /* process values */
2428       for (i=0; i<count; i++){
2429         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2430       }
2431       /*
2432       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2433       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2434       */
2435     }
2436   }
2437 
2438   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2439   ierr = PetscFree(work);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatRelax_MPISBAIJ"
2445 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2446 {
2447   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2448   int            ierr,mbs=mat->mbs,bs=mat->bs;
2449   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2450   Vec            bb1;
2451 
2452   PetscFunctionBegin;
2453   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2454   if (bs > 1)
2455     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2456 
2457   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2458     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2459       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2460       its--;
2461     }
2462 
2463     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2464     while (its--){
2465 
2466       /* lower triangular part: slvec0b = - B^T*xx */
2467       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2468 
2469       /* copy xx into slvec0a */
2470       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2471       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2472       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2473       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2474 
2475       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2476 
2477       /* copy bb into slvec1a */
2478       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2479       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2480       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2481       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2482 
2483       /* set slvec1b = 0 */
2484       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2485 
2486       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2487       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2488       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2489       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2490 
2491       /* upper triangular part: bb1 = bb1 - B*x */
2492       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2493 
2494       /* local diagonal sweep */
2495       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2496     }
2497     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2498   } else {
2499     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2506 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2507 {
2508   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2509   int            ierr;
2510   PetscScalar    mone=-1.0;
2511   Vec            lvec1,bb1;
2512 
2513   PetscFunctionBegin;
2514   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2515   if (mat->bs > 1)
2516     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2517 
2518   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2519     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2520       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2521       its--;
2522     }
2523 
2524     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2525     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2526     while (its--){
2527       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2528 
2529       /* lower diagonal part: bb1 = bb - B^T*xx */
2530       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2531       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2532 
2533       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2534       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2535       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2536 
2537       /* upper diagonal part: bb1 = bb1 - B*x */
2538       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2539       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2540 
2541       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2542 
2543       /* diagonal sweep */
2544       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2545     }
2546     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2547     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2548   } else {
2549     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2550   }
2551   PetscFunctionReturn(0);
2552 }
2553 
2554