xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 0b498fdb2a73e86aac2706d824139bbc5f26bf18)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *);
14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
83 static int CreateColmap_MPISBAIJ_Private(Mat mat)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"Function not yet written for SBAIJ format");
87   /* PetscFunctionReturn(0); */
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
123  \
124         /* malloc new storage space */ \
125         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
127         new_j = (int*)(new_a + bs2*new_nz); \
128         new_i = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
199  \
200         /* malloc new storage space */ \
201         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
203         new_j = (int*)(new_a + bs2*new_nz); \
204         new_i = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNCT__
248 #define __FUNCT__ "MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int          ierr,i,N = m*n;
253   MatScalar    *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,"Function not yet written for SBAIJ format");
302   /* PetscFunctionReturn(0); */
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,"Function not yet written for SBAIJ format");
315   /* PetscFunctionReturn(0); */
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSetValues_MPIBAIJ"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar    value;
328   PetscTruth   roworiented = baij->roworiented;
329   int          ierr,i,j,row,col;
330   int          rstart_orig=baij->rstart_bs;
331   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
332   int          cend_orig=baij->cend_bs,bs=baij->bs;
333 
334   /* Some Variables required in the macro */
335   Mat          A = baij->A;
336   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
337   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
338   MatScalar    *aa=a->a;
339 
340   Mat          B = baij->B;
341   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
342   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
343   MatScalar    *ba=b->a;
344 
345   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
346   int          low,high,t,ridx,cidx,bs2=a->bs2;
347   MatScalar    *ap,*bap;
348 
349   /* for stash */
350   int          n_loc, *in_loc=0;
351   MatScalar    *v_loc=0;
352 
353   PetscFunctionBegin;
354 
355   if(!baij->donotstash){
356     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
357     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
358   }
359 
360   for (i=0; i<m; i++) {
361     if (im[i] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
364 #endif
365     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
366       row = im[i] - rstart_orig;              /* local row index */
367       for (j=0; j<n; j++) {
368         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
369         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
370           col = in[j] - cstart_orig;          /* local col index */
371           brow = row/bs; bcol = col/bs;
372           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         } else if (in[j] < 0) continue;
377 #if defined(PETSC_USE_BOPT_g)
378         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
379 #endif
380         else {  /* off-diag entry (B) */
381           if (mat->was_assembled) {
382             if (!baij->colmap) {
383               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
384             }
385 #if defined (PETSC_USE_CTABLE)
386             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
387             col  = col - 1;
388 #else
389             col = baij->colmap[in[j]/bs] - 1;
390 #endif
391             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
395               B = baij->B;
396               b = (Mat_SeqBAIJ*)(B)->data;
397               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
398               ba=b->a;
399             } else col += in[j]%bs;
400           } else col = in[j];
401           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
402           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
403           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
404         }
405       }
406     } else {  /* off processor entry */
407       if (!baij->donotstash) {
408         n_loc = 0;
409         for (j=0; j<n; j++){
410           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
411           in_loc[n_loc] = in[j];
412           if (roworiented) {
413             v_loc[n_loc] = v[i*n+j];
414           } else {
415             v_loc[n_loc] = v[j*m+i];
416           }
417           n_loc++;
418         }
419         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
420       }
421     }
422   }
423 
424   if(!baij->donotstash){
425     ierr = PetscFree(in_loc);CHKERRQ(ierr);
426     ierr = PetscFree(v_loc);CHKERRQ(ierr);
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
433 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
434 {
435   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
436   MatScalar    *value,*barray=baij->barray;
437   PetscTruth   roworiented = baij->roworiented;
438   int          ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
439   int          rend=baij->rend,cstart=baij->cstart,stepval;
440   int          cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
441 
442   PetscFunctionBegin;
443   if(!barray) {
444     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
445     baij->barray = barray;
446   }
447 
448   if (roworiented) {
449     stepval = (n-1)*bs;
450   } else {
451     stepval = (m-1)*bs;
452   }
453   for (i=0; i<m; i++) {
454     if (im[i] < 0) continue;
455 #if defined(PETSC_USE_BOPT_g)
456     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
457 #endif
458     if (im[i] >= rstart && im[i] < rend) {
459       row = im[i] - rstart;
460       for (j=0; j<n; j++) {
461         /* If NumCol = 1 then a copy is not required */
462         if ((roworiented) && (n == 1)) {
463           barray = v + i*bs2;
464         } else if((!roworiented) && (m == 1)) {
465           barray = v + j*bs2;
466         } else { /* Here a copy is required */
467           if (roworiented) {
468             value = v + i*(stepval+bs)*bs + j*bs;
469           } else {
470             value = v + j*(stepval+bs)*bs + i*bs;
471           }
472           for (ii=0; ii<bs; ii++,value+=stepval) {
473             for (jj=0; jj<bs; jj++) {
474               *barray++  = *value++;
475             }
476           }
477           barray -=bs2;
478         }
479 
480         if (in[j] >= cstart && in[j] < cend){
481           col  = in[j] - cstart;
482           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
483         }
484         else if (in[j] < 0) continue;
485 #if defined(PETSC_USE_BOPT_g)
486         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
487 #endif
488         else {
489           if (mat->was_assembled) {
490             if (!baij->colmap) {
491               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
492             }
493 
494 #if defined(PETSC_USE_BOPT_g)
495 #if defined (PETSC_USE_CTABLE)
496             { int data;
497               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
498               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
499             }
500 #else
501             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
502 #endif
503 #endif
504 #if defined (PETSC_USE_CTABLE)
505 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
506             col  = (col - 1)/bs;
507 #else
508             col = (baij->colmap[in[j]] - 1)/bs;
509 #endif
510             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
511               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
512               col =  in[j];
513             }
514           }
515           else col = in[j];
516           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
517         }
518       }
519     } else {
520       if (!baij->donotstash) {
521         if (roworiented) {
522           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
523         } else {
524           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         }
526       }
527     }
528   }
529   PetscFunctionReturn(0);
530 }
531 
532 #define HASH_KEY 0.6180339887
533 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
534 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
535 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
536 #undef __FUNCT__
537 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
538 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
539 {
540   PetscFunctionBegin;
541   SETERRQ(1,"Function not yet written for SBAIJ format");
542   /* PetscFunctionReturn(0); */
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
547 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
548 {
549   PetscFunctionBegin;
550   SETERRQ(1,"Function not yet written for SBAIJ format");
551   /* PetscFunctionReturn(0); */
552 }
553 
554 #undef __FUNCT__
555 #define __FUNCT__ "MatGetValues_MPISBAIJ"
556 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
557 {
558   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
559   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
560   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
561 
562   PetscFunctionBegin;
563   for (i=0; i<m; i++) {
564     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
565     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
566     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
567       row = idxm[i] - bsrstart;
568       for (j=0; j<n; j++) {
569         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
570         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
571         if (idxn[j] >= bscstart && idxn[j] < bscend){
572           col = idxn[j] - bscstart;
573           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
574         } else {
575           if (!baij->colmap) {
576             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
577           }
578 #if defined (PETSC_USE_CTABLE)
579           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
580           data --;
581 #else
582           data = baij->colmap[idxn[j]/bs]-1;
583 #endif
584           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
585           else {
586             col  = data + idxn[j]%bs;
587             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
588           }
589         }
590       }
591     } else {
592       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
593     }
594   }
595  PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatNorm_MPISBAIJ"
600 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
601 {
602   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
603   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
604   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
605   int        ierr;
606   PetscReal  sum[2],*lnorm2;
607 
608   PetscFunctionBegin;
609   if (baij->size == 1) {
610     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
611   } else {
612     if (type == NORM_FROBENIUS) {
613       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
614       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
615       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
616       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
617       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
618       /*
619       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
620       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
621       */
622       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
623       /*
624       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
625       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
626 
627       *norm = sqrt(sum[0] + 2*sum[1]);
628       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
629     } else {
630       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
631     }
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637   Creates the hash table, and sets the table
638   This table is created only once.
639   If new entried need to be added to the matrix
640   then the hash table has to be destroyed and
641   recreated.
642 */
643 #undef __FUNCT__
644 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
645 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
646 {
647   PetscFunctionBegin;
648   SETERRQ(1,"Function not yet written for SBAIJ format");
649   /* PetscFunctionReturn(0); */
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
654 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
655 {
656   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
657   int         ierr,nstash,reallocs;
658   InsertMode  addv;
659 
660   PetscFunctionBegin;
661   if (baij->donotstash) {
662     PetscFunctionReturn(0);
663   }
664 
665   /* make sure all processors are either in INSERTMODE or ADDMODE */
666   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
667   if (addv == (ADD_VALUES|INSERT_VALUES)) {
668     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
669   }
670   mat->insertmode = addv; /* in case this processor had no cache */
671 
672   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
673   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
674   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
675   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
676   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
677   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
683 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
684 {
685   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
686   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
687   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
688   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
689   int         *row,*col,other_disassembled;
690   PetscTruth  r1,r2,r3;
691   MatScalar   *val;
692   InsertMode  addv = mat->insertmode;
693 #if defined(PETSC_HAVE_SPOOLES)
694   PetscTruth  flag;
695 #endif
696 
697   PetscFunctionBegin;
698 
699   if (!baij->donotstash) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       /*
703       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
704       PetscSynchronizedFlush(PETSC_COMM_WORLD);
705       */
706       if (!flg) break;
707 
708       for (i=0; i<n;) {
709         /* Now identify the consecutive vals belonging to the same row */
710         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
711         if (j < n) ncols = j-i;
712         else       ncols = n-i;
713         /* Now assemble all these values with a single function call */
714         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
715         i = j;
716       }
717     }
718     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
719     /* Now process the block-stash. Since the values are stashed column-oriented,
720        set the roworiented flag to column oriented, and after MatSetValues()
721        restore the original flags */
722     r1 = baij->roworiented;
723     r2 = a->roworiented;
724     r3 = b->roworiented;
725     baij->roworiented = PETSC_FALSE;
726     a->roworiented    = PETSC_FALSE;
727     b->roworiented    = PETSC_FALSE;
728     while (1) {
729       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
730       if (!flg) break;
731 
732       for (i=0; i<n;) {
733         /* Now identify the consecutive vals belonging to the same row */
734         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
735         if (j < n) ncols = j-i;
736         else       ncols = n-i;
737         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
738         i = j;
739       }
740     }
741     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
742     baij->roworiented = r1;
743     a->roworiented    = r2;
744     b->roworiented    = r3;
745   }
746 
747   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
748   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
749 
750   /* determine if any processor has disassembled, if so we must
751      also disassemble ourselfs, in order that we may reassemble. */
752   /*
753      if nonzero structure of submatrix B cannot change then we know that
754      no processor disassembled thus we can skip this stuff
755   */
756   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
757     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
758     if (mat->was_assembled && !other_disassembled) {
759       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
760     }
761   }
762 
763   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
764     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
765   }
766   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
767   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
768 
769 #if defined(PETSC_USE_BOPT_g)
770   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
771     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
772     baij->ht_total_ct  = 0;
773     baij->ht_insert_ct = 0;
774   }
775 #endif
776   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
777     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
778     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
779     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
780   }
781 
782   if (baij->rowvalues) {
783     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
784     baij->rowvalues = 0;
785   }
786 
787 #if defined(PETSC_HAVE_SPOOLES)
788   ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
789   if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); }
790 #endif
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
796 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
797 {
798   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
799   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
800   PetscTruth        isascii,isdraw;
801   PetscViewer       sviewer;
802   PetscViewerFormat format;
803 
804   PetscFunctionBegin;
805   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
806   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
807   if (isascii) {
808     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
809     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
810       MatInfo info;
811       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
812       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
814               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
815               baij->bs,(int)info.memory);CHKERRQ(ierr);
816       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
818       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
819       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
820       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
821       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
822       PetscFunctionReturn(0);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
825       PetscFunctionReturn(0);
826     }
827   }
828 
829   if (isdraw) {
830     PetscDraw       draw;
831     PetscTruth isnull;
832     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
833     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
834   }
835 
836   if (size == 1) {
837     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
838     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
839   } else {
840     /* assemble the entire matrix onto first processor. */
841     Mat         A;
842     Mat_SeqSBAIJ *Aloc;
843     Mat_SeqBAIJ *Bloc;
844     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
845     MatScalar   *a;
846 
847     if (!rank) {
848       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
849     } else {
850       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
851     }
852     PetscLogObjectParent(mat,A);
853 
854     /* copy over the A part */
855     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
856     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
857     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
858 
859     for (i=0; i<mbs; i++) {
860       rvals[0] = bs*(baij->rstart + i);
861       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
862       for (j=ai[i]; j<ai[i+1]; j++) {
863         col = (baij->cstart+aj[j])*bs;
864         for (k=0; k<bs; k++) {
865           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
866           col++; a += bs;
867         }
868       }
869     }
870     /* copy over the B part */
871     Bloc = (Mat_SeqBAIJ*)baij->B->data;
872     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
873     for (i=0; i<mbs; i++) {
874       rvals[0] = bs*(baij->rstart + i);
875       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
876       for (j=ai[i]; j<ai[i+1]; j++) {
877         col = baij->garray[aj[j]]*bs;
878         for (k=0; k<bs; k++) {
879           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
880           col++; a += bs;
881         }
882       }
883     }
884     ierr = PetscFree(rvals);CHKERRQ(ierr);
885     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     /*
888        Everyone has to call to draw the matrix since the graphics waits are
889        synchronized across all processors that share the PetscDraw object
890     */
891     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
892     if (!rank) {
893       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
894       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
895     }
896     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
897     ierr = MatDestroy(A);CHKERRQ(ierr);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatView_MPISBAIJ"
904 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
905 {
906   int        ierr;
907   PetscTruth isascii,isdraw,issocket,isbinary;
908 
909   PetscFunctionBegin;
910   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
911   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
912   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
913   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
914   if (isascii || isdraw || issocket || isbinary) {
915     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
916   } else {
917     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 #undef __FUNCT__
923 #define __FUNCT__ "MatDestroy_MPISBAIJ"
924 int MatDestroy_MPISBAIJ(Mat mat)
925 {
926   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
927   int         ierr;
928 
929   PetscFunctionBegin;
930 #if defined(PETSC_USE_LOG)
931   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
932 #endif
933   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
934   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
935   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
936   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
937   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
938 #if defined (PETSC_USE_CTABLE)
939   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
940 #else
941   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
942 #endif
943   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
944   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
945   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
946   if (baij->slvec0) {
947     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
948     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
949   }
950   if (baij->slvec1) {
951     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
952     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
953     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
954   }
955   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
956   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
957   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
958   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
959 #if defined(PETSC_USE_MAT_SINGLE)
960   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
961 #endif
962   ierr = PetscFree(baij);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMult_MPISBAIJ"
968 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
969 {
970   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
971   int         ierr,nt,mbs=a->mbs,bs=a->bs;
972   PetscScalar *x,*from,zero=0.0;
973 
974   PetscFunctionBegin;
975   /*
976   PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n");
977   PetscSynchronizedFlush(PETSC_COMM_WORLD);
978   */
979   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
980   if (nt != A->n) {
981     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
982   }
983   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
984   if (nt != A->m) {
985     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
986   }
987 
988   /* diagonal part */
989   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
990   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
991 
992   /* subdiagonal part */
993   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
994 
995   /* copy x into the vec slvec0 */
996   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
997   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
998   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
999   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1000 
1001   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1002   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1003   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1004 
1005   /* supperdiagonal part */
1006   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1007 
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1013 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1014 {
1015   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1016   int         ierr,nt;
1017 
1018   PetscFunctionBegin;
1019   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1020   if (nt != A->n) {
1021     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1022   }
1023   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1024   if (nt != A->m) {
1025     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1026   }
1027 
1028   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1029   /* do diagonal part */
1030   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1031   /* do supperdiagonal part */
1032   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1033   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1034   /* do subdiagonal part */
1035   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1036   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1038 
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1044 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1045 {
1046   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1047   int          ierr,mbs=a->mbs,bs=a->bs;
1048   PetscScalar  *x,*from,zero=0.0;
1049 
1050   PetscFunctionBegin;
1051   /*
1052   PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n");
1053   PetscSynchronizedFlush(PETSC_COMM_WORLD);
1054   */
1055   /* diagonal part */
1056   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1057   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1058 
1059   /* subdiagonal part */
1060   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1061 
1062   /* copy x into the vec slvec0 */
1063   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1064   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1065   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1066   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1067 
1068   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1069   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1070   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1071 
1072   /* supperdiagonal part */
1073   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1074 
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1080 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1081 {
1082   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1083   int        ierr;
1084 
1085   PetscFunctionBegin;
1086   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1087   /* do diagonal part */
1088   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1089   /* do supperdiagonal part */
1090   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1091   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1092 
1093   /* do subdiagonal part */
1094   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1095   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1096   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1097 
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1103 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1104 {
1105   PetscFunctionBegin;
1106   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1107   /* PetscFunctionReturn(0); */
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1112 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1113 {
1114   PetscFunctionBegin;
1115   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1116   /* PetscFunctionReturn(0); */
1117 }
1118 
1119 /*
1120   This only works correctly for square matrices where the subblock A->A is the
1121    diagonal block
1122 */
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1125 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1126 {
1127   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1128   int         ierr;
1129 
1130   PetscFunctionBegin;
1131   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1132   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatScale_MPISBAIJ"
1138 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A)
1139 {
1140   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1141   int         ierr;
1142 
1143   PetscFunctionBegin;
1144   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1145   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1151 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1152 {
1153   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1154   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1155   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1156   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1157   int            *cmap,*idx_p,cstart = mat->cstart;
1158 
1159   PetscFunctionBegin;
1160   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1161   mat->getrowactive = PETSC_TRUE;
1162 
1163   if (!mat->rowvalues && (idx || v)) {
1164     /*
1165         allocate enough space to hold information from the longest row.
1166     */
1167     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1168     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1169     int     max = 1,mbs = mat->mbs,tmp;
1170     for (i=0; i<mbs; i++) {
1171       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1172       if (max < tmp) { max = tmp; }
1173     }
1174     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1175     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1176   }
1177 
1178   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1179   lrow = row - brstart;  /* local row index */
1180 
1181   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1182   if (!v)   {pvA = 0; pvB = 0;}
1183   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1184   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1185   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1186   nztot = nzA + nzB;
1187 
1188   cmap  = mat->garray;
1189   if (v  || idx) {
1190     if (nztot) {
1191       /* Sort by increasing column numbers, assuming A and B already sorted */
1192       int imark = -1;
1193       if (v) {
1194         *v = v_p = mat->rowvalues;
1195         for (i=0; i<nzB; i++) {
1196           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1197           else break;
1198         }
1199         imark = i;
1200         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1201         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1202       }
1203       if (idx) {
1204         *idx = idx_p = mat->rowindices;
1205         if (imark > -1) {
1206           for (i=0; i<imark; i++) {
1207             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1208           }
1209         } else {
1210           for (i=0; i<nzB; i++) {
1211             if (cmap[cworkB[i]/bs] < cstart)
1212               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1213             else break;
1214           }
1215           imark = i;
1216         }
1217         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1218         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1219       }
1220     } else {
1221       if (idx) *idx = 0;
1222       if (v)   *v   = 0;
1223     }
1224   }
1225   *nz = nztot;
1226   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1227   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1233 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1234 {
1235   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1236 
1237   PetscFunctionBegin;
1238   if (baij->getrowactive == PETSC_FALSE) {
1239     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1240   }
1241   baij->getrowactive = PETSC_FALSE;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1247 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1248 {
1249   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1250 
1251   PetscFunctionBegin;
1252   *bs = baij->bs;
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1258 int MatZeroEntries_MPISBAIJ(Mat A)
1259 {
1260   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1261   int         ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1265   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1271 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1272 {
1273   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1274   Mat         A = a->A,B = a->B;
1275   int         ierr;
1276   PetscReal   isend[5],irecv[5];
1277 
1278   PetscFunctionBegin;
1279   info->block_size     = (PetscReal)a->bs;
1280   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1281   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1282   isend[3] = info->memory;  isend[4] = info->mallocs;
1283   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1284   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1285   isend[3] += info->memory;  isend[4] += info->mallocs;
1286   if (flag == MAT_LOCAL) {
1287     info->nz_used      = isend[0];
1288     info->nz_allocated = isend[1];
1289     info->nz_unneeded  = isend[2];
1290     info->memory       = isend[3];
1291     info->mallocs      = isend[4];
1292   } else if (flag == MAT_GLOBAL_MAX) {
1293     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1294     info->nz_used      = irecv[0];
1295     info->nz_allocated = irecv[1];
1296     info->nz_unneeded  = irecv[2];
1297     info->memory       = irecv[3];
1298     info->mallocs      = irecv[4];
1299   } else if (flag == MAT_GLOBAL_SUM) {
1300     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1301     info->nz_used      = irecv[0];
1302     info->nz_allocated = irecv[1];
1303     info->nz_unneeded  = irecv[2];
1304     info->memory       = irecv[3];
1305     info->mallocs      = irecv[4];
1306   } else {
1307     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1308   }
1309   info->rows_global       = (PetscReal)A->M;
1310   info->columns_global    = (PetscReal)A->N;
1311   info->rows_local        = (PetscReal)A->m;
1312   info->columns_local     = (PetscReal)A->N;
1313   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1314   info->fill_ratio_needed = 0;
1315   info->factor_mallocs    = 0;
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1321 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1322 {
1323   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1324   int         ierr;
1325 
1326   PetscFunctionBegin;
1327   switch (op) {
1328   case MAT_NO_NEW_NONZERO_LOCATIONS:
1329   case MAT_YES_NEW_NONZERO_LOCATIONS:
1330   case MAT_COLUMNS_UNSORTED:
1331   case MAT_COLUMNS_SORTED:
1332   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1333   case MAT_KEEP_ZEROED_ROWS:
1334   case MAT_NEW_NONZERO_LOCATION_ERR:
1335     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1336     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1337     break;
1338   case MAT_ROW_ORIENTED:
1339     a->roworiented = PETSC_TRUE;
1340     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1341     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1342     break;
1343   case MAT_ROWS_SORTED:
1344   case MAT_ROWS_UNSORTED:
1345   case MAT_YES_NEW_DIAGONALS:
1346   case MAT_USE_SINGLE_PRECISION_SOLVES:
1347     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1348     break;
1349   case MAT_COLUMN_ORIENTED:
1350     a->roworiented = PETSC_FALSE;
1351     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1352     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1353     break;
1354   case MAT_IGNORE_OFF_PROC_ENTRIES:
1355     a->donotstash = PETSC_TRUE;
1356     break;
1357   case MAT_NO_NEW_DIAGONALS:
1358     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1359   case MAT_USE_HASH_TABLE:
1360     a->ht_flag = PETSC_TRUE;
1361     break;
1362   default:
1363     SETERRQ(PETSC_ERR_SUP,"unknown option");
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1370 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1371 {
1372   int ierr;
1373   PetscFunctionBegin;
1374   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1380 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1381 {
1382   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1383   Mat         a = baij->A,b = baij->B;
1384   int         ierr,s1,s2,s3;
1385 
1386   PetscFunctionBegin;
1387   if (ll != rr) {
1388     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1389   }
1390   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1391   if (rr) {
1392     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1393     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1394     /* Overlap communication with computation. */
1395     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1396     /*} if (ll) { */
1397     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1398     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1399     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1400     /* } */
1401   /* scale  the diagonal block */
1402   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1403 
1404   /* if (rr) { */
1405     /* Do a scatter end and then right scale the off-diagonal block */
1406     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1407     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1408   }
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1415 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag)
1416 {
1417   PetscFunctionBegin;
1418   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1423 int MatPrintHelp_MPISBAIJ(Mat A)
1424 {
1425   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1426   MPI_Comm    comm = A->comm;
1427   static int  called = 0;
1428   int         ierr;
1429 
1430   PetscFunctionBegin;
1431   if (!a->rank) {
1432     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1433   }
1434   if (called) {PetscFunctionReturn(0);} else called = 1;
1435   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1436   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1442 int MatSetUnfactored_MPISBAIJ(Mat A)
1443 {
1444   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1445   int         ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1453 
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatEqual_MPISBAIJ"
1456 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1457 {
1458   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1459   Mat         a,b,c,d;
1460   PetscTruth  flg;
1461   int         ierr;
1462 
1463   PetscFunctionBegin;
1464   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr);
1465   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1466   a = matA->A; b = matA->B;
1467   c = matB->A; d = matB->B;
1468 
1469   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1470   if (flg == PETSC_TRUE) {
1471     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1472   }
1473   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1479 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1480 {
1481   int        ierr;
1482 
1483   PetscFunctionBegin;
1484   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 /* -------------------------------------------------------------------*/
1488 static struct _MatOps MatOps_Values = {
1489   MatSetValues_MPISBAIJ,
1490   MatGetRow_MPISBAIJ,
1491   MatRestoreRow_MPISBAIJ,
1492   MatMult_MPISBAIJ,
1493   MatMultAdd_MPISBAIJ,
1494   MatMultTranspose_MPISBAIJ,
1495   MatMultTransposeAdd_MPISBAIJ,
1496   0,
1497   0,
1498   0,
1499   0,
1500   0,
1501   0,
1502   MatRelax_MPISBAIJ,
1503   MatTranspose_MPISBAIJ,
1504   MatGetInfo_MPISBAIJ,
1505   MatEqual_MPISBAIJ,
1506   MatGetDiagonal_MPISBAIJ,
1507   MatDiagonalScale_MPISBAIJ,
1508   MatNorm_MPISBAIJ,
1509   MatAssemblyBegin_MPISBAIJ,
1510   MatAssemblyEnd_MPISBAIJ,
1511   0,
1512   MatSetOption_MPISBAIJ,
1513   MatZeroEntries_MPISBAIJ,
1514   MatZeroRows_MPISBAIJ,
1515   0,
1516   0,
1517   0,
1518   0,
1519   MatSetUpPreallocation_MPISBAIJ,
1520   0,
1521   0,
1522   0,
1523   0,
1524   MatDuplicate_MPISBAIJ,
1525   0,
1526   0,
1527   0,
1528   0,
1529   0,
1530   MatGetSubMatrices_MPISBAIJ,
1531   MatIncreaseOverlap_MPISBAIJ,
1532   MatGetValues_MPISBAIJ,
1533   0,
1534   MatPrintHelp_MPISBAIJ,
1535   MatScale_MPISBAIJ,
1536   0,
1537   0,
1538   0,
1539   MatGetBlockSize_MPISBAIJ,
1540   0,
1541   0,
1542   0,
1543   0,
1544   0,
1545   0,
1546   MatSetUnfactored_MPISBAIJ,
1547   0,
1548   MatSetValuesBlocked_MPISBAIJ,
1549   0,
1550   0,
1551   0,
1552   MatGetPetscMaps_Petsc,
1553   0,
1554   0,
1555   0,
1556   0,
1557   0,
1558   0,
1559   MatGetRowMax_MPISBAIJ};
1560 
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1565 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1566 {
1567   PetscFunctionBegin;
1568   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1569   *iscopy = PETSC_FALSE;
1570   PetscFunctionReturn(0);
1571 }
1572 EXTERN_C_END
1573 
1574 EXTERN_C_BEGIN
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatCreate_MPISBAIJ"
1577 int MatCreate_MPISBAIJ(Mat B)
1578 {
1579   Mat_MPISBAIJ *b;
1580   int          ierr;
1581   PetscTruth   flg;
1582 
1583   PetscFunctionBegin;
1584 
1585   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1586   B->data = (void*)b;
1587   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1588   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1589 
1590   B->ops->destroy    = MatDestroy_MPISBAIJ;
1591   B->ops->view       = MatView_MPISBAIJ;
1592   B->mapping    = 0;
1593   B->factor     = 0;
1594   B->assembled  = PETSC_FALSE;
1595 
1596   B->insertmode = NOT_SET_VALUES;
1597   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1598   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1599 
1600   /* build local table of row and column ownerships */
1601   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1602   b->cowners    = b->rowners + b->size + 2;
1603   b->rowners_bs = b->cowners + b->size + 2;
1604   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1605 
1606   /* build cache for off array entries formed */
1607   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1608   b->donotstash  = PETSC_FALSE;
1609   b->colmap      = PETSC_NULL;
1610   b->garray      = PETSC_NULL;
1611   b->roworiented = PETSC_TRUE;
1612 
1613 #if defined(PETSC_USE_MAT_SINGLE)
1614   /* stuff for MatSetValues_XXX in single precision */
1615   b->setvalueslen     = 0;
1616   b->setvaluescopy    = PETSC_NULL;
1617 #endif
1618 
1619   /* stuff used in block assembly */
1620   b->barray       = 0;
1621 
1622   /* stuff used for matrix vector multiply */
1623   b->lvec         = 0;
1624   b->Mvctx        = 0;
1625   b->slvec0       = 0;
1626   b->slvec0b      = 0;
1627   b->slvec1       = 0;
1628   b->slvec1a      = 0;
1629   b->slvec1b      = 0;
1630   b->sMvctx       = 0;
1631 
1632   /* stuff for MatGetRow() */
1633   b->rowindices   = 0;
1634   b->rowvalues    = 0;
1635   b->getrowactive = PETSC_FALSE;
1636 
1637   /* hash table stuff */
1638   b->ht           = 0;
1639   b->hd           = 0;
1640   b->ht_size      = 0;
1641   b->ht_flag      = PETSC_FALSE;
1642   b->ht_fact      = 0;
1643   b->ht_total_ct  = 0;
1644   b->ht_insert_ct = 0;
1645 
1646   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1647   if (flg) {
1648     PetscReal fact = 1.39;
1649     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1650     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1651     if (fact <= 1.0) fact = 1.39;
1652     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1653     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1654   }
1655   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1656                                      "MatStoreValues_MPISBAIJ",
1657                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1659                                      "MatRetrieveValues_MPISBAIJ",
1660                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1661   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1662                                      "MatGetDiagonalBlock_MPISBAIJ",
1663                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 EXTERN_C_END
1667 
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1670 /*@C
1671    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1672    the user should preallocate the matrix storage by setting the parameters
1673    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1674    performance can be increased by more than a factor of 50.
1675 
1676    Collective on Mat
1677 
1678    Input Parameters:
1679 +  A - the matrix
1680 .  bs   - size of blockk
1681 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1682            submatrix  (same for all local rows)
1683 .  d_nnz - array containing the number of block nonzeros in the various block rows
1684            in the upper triangular and diagonal part of the in diagonal portion of the local
1685            (possibly different for each block row) or PETSC_NULL.  You must leave room
1686            for the diagonal entry even if it is zero.
1687 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1688            submatrix (same for all local rows).
1689 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1690            off-diagonal portion of the local submatrix (possibly different for
1691            each block row) or PETSC_NULL.
1692 
1693 
1694    Options Database Keys:
1695 .   -mat_no_unroll - uses code that does not unroll the loops in the
1696                      block calculations (much slower)
1697 .   -mat_block_size - size of the blocks to use
1698 
1699    Notes:
1700 
1701    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1702    than it must be used on all processors that share the object for that argument.
1703 
1704    Storage Information:
1705    For a square global matrix we define each processor's diagonal portion
1706    to be its local rows and the corresponding columns (a square submatrix);
1707    each processor's off-diagonal portion encompasses the remainder of the
1708    local matrix (a rectangular submatrix).
1709 
1710    The user can specify preallocated storage for the diagonal part of
1711    the local submatrix with either d_nz or d_nnz (not both).  Set
1712    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1713    memory allocation.  Likewise, specify preallocated storage for the
1714    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1715 
1716    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1717    the figure below we depict these three local rows and all columns (0-11).
1718 
1719 .vb
1720            0 1 2 3 4 5 6 7 8 9 10 11
1721           -------------------
1722    row 3  |  o o o d d d o o o o o o
1723    row 4  |  o o o d d d o o o o o o
1724    row 5  |  o o o d d d o o o o o o
1725           -------------------
1726 .ve
1727 
1728    Thus, any entries in the d locations are stored in the d (diagonal)
1729    submatrix, and any entries in the o locations are stored in the
1730    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1731    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1732 
1733    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1734    plus the diagonal part of the d matrix,
1735    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1736    In general, for PDE problems in which most nonzeros are near the diagonal,
1737    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1738    or you will get TERRIBLE performance; see the users' manual chapter on
1739    matrices.
1740 
1741    Level: intermediate
1742 
1743 .keywords: matrix, block, aij, compressed row, sparse, parallel
1744 
1745 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1746 @*/
1747 
1748 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1749 {
1750   Mat_MPISBAIJ *b;
1751   int          ierr,i,mbs,Mbs;
1752   PetscTruth   flg2;
1753 
1754   PetscFunctionBegin;
1755   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr);
1756   if (!flg2) PetscFunctionReturn(0);
1757 
1758   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1759 
1760   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1761   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1762   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1763   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1764   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1765   if (d_nnz) {
1766     for (i=0; i<B->m/bs; i++) {
1767       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1768     }
1769   }
1770   if (o_nnz) {
1771     for (i=0; i<B->m/bs; i++) {
1772       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1773     }
1774   }
1775   B->preallocated = PETSC_TRUE;
1776   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1777   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1778   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1779   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1780 
1781   b   = (Mat_MPISBAIJ*)B->data;
1782   mbs = B->m/bs;
1783   Mbs = B->M/bs;
1784   if (mbs*bs != B->m) {
1785     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1786   }
1787 
1788   b->bs  = bs;
1789   b->bs2 = bs*bs;
1790   b->mbs = mbs;
1791   b->nbs = mbs;
1792   b->Mbs = Mbs;
1793   b->Nbs = Mbs;
1794 
1795   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1796   b->rowners[0]    = 0;
1797   for (i=2; i<=b->size; i++) {
1798     b->rowners[i] += b->rowners[i-1];
1799   }
1800   b->rstart    = b->rowners[b->rank];
1801   b->rend      = b->rowners[b->rank+1];
1802   b->cstart    = b->rstart;
1803   b->cend      = b->rend;
1804   for (i=0; i<=b->size; i++) {
1805     b->rowners_bs[i] = b->rowners[i]*bs;
1806   }
1807   b->rstart_bs = b-> rstart*bs;
1808   b->rend_bs   = b->rend*bs;
1809 
1810   b->cstart_bs = b->cstart*bs;
1811   b->cend_bs   = b->cend*bs;
1812 
1813 
1814   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1815   PetscLogObjectParent(B,b->A);
1816   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1817   PetscLogObjectParent(B,b->B);
1818 
1819   /* build cache for off array entries formed */
1820   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1821 
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatCreateMPISBAIJ"
1827 /*@C
1828    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1829    (block compressed row).  For good matrix assembly performance
1830    the user should preallocate the matrix storage by setting the parameters
1831    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1832    performance can be increased by more than a factor of 50.
1833 
1834    Collective on MPI_Comm
1835 
1836    Input Parameters:
1837 +  comm - MPI communicator
1838 .  bs   - size of blockk
1839 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1840            This value should be the same as the local size used in creating the
1841            y vector for the matrix-vector product y = Ax.
1842 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1843            This value should be the same as the local size used in creating the
1844            x vector for the matrix-vector product y = Ax.
1845 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1846 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1847 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1848            submatrix  (same for all local rows)
1849 .  d_nnz - array containing the number of block nonzeros in the various block rows
1850            in the upper triangular portion of the in diagonal portion of the local
1851            (possibly different for each block block row) or PETSC_NULL.
1852            You must leave room for the diagonal entry even if it is zero.
1853 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1854            submatrix (same for all local rows).
1855 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1856            off-diagonal portion of the local submatrix (possibly different for
1857            each block row) or PETSC_NULL.
1858 
1859    Output Parameter:
1860 .  A - the matrix
1861 
1862    Options Database Keys:
1863 .   -mat_no_unroll - uses code that does not unroll the loops in the
1864                      block calculations (much slower)
1865 .   -mat_block_size - size of the blocks to use
1866 .   -mat_mpi - use the parallel matrix data structures even on one processor
1867                (defaults to using SeqBAIJ format on one processor)
1868 
1869    Notes:
1870    The user MUST specify either the local or global matrix dimensions
1871    (possibly both).
1872 
1873    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1874    than it must be used on all processors that share the object for that argument.
1875 
1876    Storage Information:
1877    For a square global matrix we define each processor's diagonal portion
1878    to be its local rows and the corresponding columns (a square submatrix);
1879    each processor's off-diagonal portion encompasses the remainder of the
1880    local matrix (a rectangular submatrix).
1881 
1882    The user can specify preallocated storage for the diagonal part of
1883    the local submatrix with either d_nz or d_nnz (not both).  Set
1884    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1885    memory allocation.  Likewise, specify preallocated storage for the
1886    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1887 
1888    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1889    the figure below we depict these three local rows and all columns (0-11).
1890 
1891 .vb
1892            0 1 2 3 4 5 6 7 8 9 10 11
1893           -------------------
1894    row 3  |  o o o d d d o o o o o o
1895    row 4  |  o o o d d d o o o o o o
1896    row 5  |  o o o d d d o o o o o o
1897           -------------------
1898 .ve
1899 
1900    Thus, any entries in the d locations are stored in the d (diagonal)
1901    submatrix, and any entries in the o locations are stored in the
1902    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1903    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1904 
1905    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1906    plus the diagonal part of the d matrix,
1907    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1908    In general, for PDE problems in which most nonzeros are near the diagonal,
1909    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1910    or you will get TERRIBLE performance; see the users' manual chapter on
1911    matrices.
1912 
1913    Level: intermediate
1914 
1915 .keywords: matrix, block, aij, compressed row, sparse, parallel
1916 
1917 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1918 @*/
1919 
1920 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1921 {
1922   int ierr,size;
1923 
1924   PetscFunctionBegin;
1925   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1926   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1927   if (size > 1) {
1928     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1929     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1930   } else {
1931     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1932     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1933   }
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1940 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1941 {
1942   Mat          mat;
1943   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1944   int          ierr,len=0;
1945 
1946   PetscFunctionBegin;
1947   *newmat       = 0;
1948   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1949   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
1950   mat->preallocated = PETSC_TRUE;
1951   a = (Mat_MPISBAIJ*)mat->data;
1952   a->bs  = oldmat->bs;
1953   a->bs2 = oldmat->bs2;
1954   a->mbs = oldmat->mbs;
1955   a->nbs = oldmat->nbs;
1956   a->Mbs = oldmat->Mbs;
1957   a->Nbs = oldmat->Nbs;
1958 
1959   a->rstart       = oldmat->rstart;
1960   a->rend         = oldmat->rend;
1961   a->cstart       = oldmat->cstart;
1962   a->cend         = oldmat->cend;
1963   a->size         = oldmat->size;
1964   a->rank         = oldmat->rank;
1965   a->donotstash   = oldmat->donotstash;
1966   a->roworiented  = oldmat->roworiented;
1967   a->rowindices   = 0;
1968   a->rowvalues    = 0;
1969   a->getrowactive = PETSC_FALSE;
1970   a->barray       = 0;
1971   a->rstart_bs    = oldmat->rstart_bs;
1972   a->rend_bs      = oldmat->rend_bs;
1973   a->cstart_bs    = oldmat->cstart_bs;
1974   a->cend_bs      = oldmat->cend_bs;
1975 
1976   /* hash table stuff */
1977   a->ht           = 0;
1978   a->hd           = 0;
1979   a->ht_size      = 0;
1980   a->ht_flag      = oldmat->ht_flag;
1981   a->ht_fact      = oldmat->ht_fact;
1982   a->ht_total_ct  = 0;
1983   a->ht_insert_ct = 0;
1984 
1985   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1986   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1987   a->cowners    = a->rowners + a->size + 2;
1988   a->rowners_bs = a->cowners + a->size + 2;
1989   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1990   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1991   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
1992   if (oldmat->colmap) {
1993 #if defined (PETSC_USE_CTABLE)
1994     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1995 #else
1996     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1997     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1998     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
1999 #endif
2000   } else a->colmap = 0;
2001   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2002     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2003     PetscLogObjectMemory(mat,len*sizeof(int));
2004     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2005   } else a->garray = 0;
2006 
2007   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2008   PetscLogObjectParent(mat,a->lvec);
2009   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2010 
2011   PetscLogObjectParent(mat,a->Mvctx);
2012   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2013   PetscLogObjectParent(mat,a->A);
2014   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2015   PetscLogObjectParent(mat,a->B);
2016   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2017   *newmat = mat;
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #include "petscsys.h"
2022 
2023 EXTERN_C_BEGIN
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatLoad_MPISBAIJ"
2026 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2027 {
2028   Mat          A;
2029   int          i,nz,ierr,j,rstart,rend,fd;
2030   PetscScalar  *vals,*buf;
2031   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2032   MPI_Status   status;
2033   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2034   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2035   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2036   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2037   int          dcount,kmax,k,nzcount,tmp;
2038 
2039   PetscFunctionBegin;
2040   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2041 
2042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2043   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2044   if (!rank) {
2045     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2046     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2047     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2048     if (header[3] < 0) {
2049       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2050     }
2051   }
2052 
2053   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2054   M = header[1]; N = header[2];
2055 
2056   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2057 
2058   /*
2059      This code adds extra rows to make sure the number of rows is
2060      divisible by the blocksize
2061   */
2062   Mbs        = M/bs;
2063   extra_rows = bs - M + bs*(Mbs);
2064   if (extra_rows == bs) extra_rows = 0;
2065   else                  Mbs++;
2066   if (extra_rows &&!rank) {
2067     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2068   }
2069 
2070   /* determine ownership of all rows */
2071   mbs        = Mbs/size + ((Mbs % size) > rank);
2072   m          = mbs*bs;
2073   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2074   browners   = rowners + size + 1;
2075   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2076   rowners[0] = 0;
2077   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2078   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2079   rstart = rowners[rank];
2080   rend   = rowners[rank+1];
2081 
2082   /* distribute row lengths to all processors */
2083   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2084   if (!rank) {
2085     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2086     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2087     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2088     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2089     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2090     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2091     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2092   } else {
2093     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2094   }
2095 
2096   if (!rank) {   /* procs[0] */
2097     /* calculate the number of nonzeros on each processor */
2098     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2099     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2100     for (i=0; i<size; i++) {
2101       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2102         procsnz[i] += rowlengths[j];
2103       }
2104     }
2105     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2106 
2107     /* determine max buffer needed and allocate it */
2108     maxnz = 0;
2109     for (i=0; i<size; i++) {
2110       maxnz = PetscMax(maxnz,procsnz[i]);
2111     }
2112     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2113 
2114     /* read in my part of the matrix column indices  */
2115     nz     = procsnz[0];
2116     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2117     mycols = ibuf;
2118     if (size == 1)  nz -= extra_rows;
2119     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2120     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2121 
2122     /* read in every ones (except the last) and ship off */
2123     for (i=1; i<size-1; i++) {
2124       nz   = procsnz[i];
2125       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2126       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2127     }
2128     /* read in the stuff for the last proc */
2129     if (size != 1) {
2130       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2131       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2132       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2133       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2134     }
2135     ierr = PetscFree(cols);CHKERRQ(ierr);
2136   } else {  /* procs[i], i>0 */
2137     /* determine buffer space needed for message */
2138     nz = 0;
2139     for (i=0; i<m; i++) {
2140       nz += locrowlens[i];
2141     }
2142     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2143     mycols = ibuf;
2144     /* receive message of column indices*/
2145     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2146     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2147     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2148   }
2149 
2150   /* loop over local rows, determining number of off diagonal entries */
2151   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2152   odlens   = dlens + (rend-rstart);
2153   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2154   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2155   masked1  = mask    + Mbs;
2156   masked2  = masked1 + Mbs;
2157   rowcount = 0; nzcount = 0;
2158   for (i=0; i<mbs; i++) {
2159     dcount  = 0;
2160     odcount = 0;
2161     for (j=0; j<bs; j++) {
2162       kmax = locrowlens[rowcount];
2163       for (k=0; k<kmax; k++) {
2164         tmp = mycols[nzcount++]/bs; /* block col. index */
2165         if (!mask[tmp]) {
2166           mask[tmp] = 1;
2167           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2168           else masked1[dcount++] = tmp; /* entry in diag portion */
2169         }
2170       }
2171       rowcount++;
2172     }
2173 
2174     dlens[i]  = dcount;  /* d_nzz[i] */
2175     odlens[i] = odcount; /* o_nzz[i] */
2176 
2177     /* zero out the mask elements we set */
2178     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2179     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2180   }
2181 
2182   /* create our matrix */
2183   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2184   CHKERRQ(ierr);
2185   A = *newmat;
2186   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2187 
2188   if (!rank) {
2189     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2190     /* read in my part of the matrix numerical values  */
2191     nz = procsnz[0];
2192     vals = buf;
2193     mycols = ibuf;
2194     if (size == 1)  nz -= extra_rows;
2195     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2196     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2197 
2198     /* insert into matrix */
2199     jj      = rstart*bs;
2200     for (i=0; i<m; i++) {
2201       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2202       mycols += locrowlens[i];
2203       vals   += locrowlens[i];
2204       jj++;
2205     }
2206 
2207     /* read in other processors (except the last one) and ship out */
2208     for (i=1; i<size-1; i++) {
2209       nz   = procsnz[i];
2210       vals = buf;
2211       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2212       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2213     }
2214     /* the last proc */
2215     if (size != 1){
2216       nz   = procsnz[i] - extra_rows;
2217       vals = buf;
2218       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2219       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2220       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2221     }
2222     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2223 
2224   } else {
2225     /* receive numeric values */
2226     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2227 
2228     /* receive message of values*/
2229     vals   = buf;
2230     mycols = ibuf;
2231     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2232     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2233     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2234 
2235     /* insert into matrix */
2236     jj      = rstart*bs;
2237     for (i=0; i<m; i++) {
2238       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2239       mycols += locrowlens[i];
2240       vals   += locrowlens[i];
2241       jj++;
2242     }
2243   }
2244 
2245   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2246   ierr = PetscFree(buf);CHKERRQ(ierr);
2247   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2248   ierr = PetscFree(rowners);CHKERRQ(ierr);
2249   ierr = PetscFree(dlens);CHKERRQ(ierr);
2250   ierr = PetscFree(mask);CHKERRQ(ierr);
2251   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2252   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 EXTERN_C_END
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2259 /*@
2260    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2261 
2262    Input Parameters:
2263 .  mat  - the matrix
2264 .  fact - factor
2265 
2266    Collective on Mat
2267 
2268    Level: advanced
2269 
2270   Notes:
2271    This can also be set by the command line option: -mat_use_hash_table fact
2272 
2273 .keywords: matrix, hashtable, factor, HT
2274 
2275 .seealso: MatSetOption()
2276 @*/
2277 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2278 {
2279   PetscFunctionBegin;
2280   SETERRQ(1,"Function not yet written for SBAIJ format");
2281   /* PetscFunctionReturn(0); */
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2286 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2287 {
2288   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2289   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2290   PetscReal    atmp;
2291   PetscReal    *work,*svalues,*rvalues;
2292   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2293   int          rank,size,*rowners_bs,dest,count,source;
2294   PetscScalar  *va;
2295   MatScalar    *ba;
2296   MPI_Status   stat;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2300   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2301 
2302   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
2303   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
2304 
2305   bs   = a->bs;
2306   mbs  = a->mbs;
2307   Mbs  = a->Mbs;
2308   ba   = b->a;
2309   bi   = b->i;
2310   bj   = b->j;
2311   /*
2312   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2313   PetscSynchronizedFlush(PETSC_COMM_WORLD);
2314   */
2315 
2316   /* find ownerships */
2317   rowners_bs = a->rowners_bs;
2318   /*
2319   if (!rank){
2320     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2321   }
2322   */
2323 
2324   /* each proc creates an array to be distributed */
2325   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2326   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2327 
2328   /* row_max for B */
2329   if (rank != size-1){
2330     for (i=0; i<mbs; i++) {
2331       ncols = bi[1] - bi[0]; bi++;
2332       brow  = bs*i;
2333       for (j=0; j<ncols; j++){
2334         bcol = bs*(*bj);
2335         for (kcol=0; kcol<bs; kcol++){
2336           col = bcol + kcol;                 /* local col index */
2337           col += rowners_bs[rank+1];      /* global col index */
2338           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2339           for (krow=0; krow<bs; krow++){
2340             atmp = PetscAbsScalar(*ba); ba++;
2341             row = brow + krow;    /* local row index */
2342             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2343             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2344             if (work[col] < atmp) work[col] = atmp;
2345           }
2346         }
2347         bj++;
2348       }
2349     }
2350     /*
2351       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2352       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2353       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2354       */
2355 
2356     /* send values to its owners */
2357     for (dest=rank+1; dest<size; dest++){
2358       svalues = work + rowners_bs[dest];
2359       count   = rowners_bs[dest+1]-rowners_bs[dest];
2360       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr);
2361       /*
2362       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2363       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2364       */
2365     }
2366   }
2367 
2368   /* receive values */
2369   if (rank){
2370     rvalues = work;
2371     count   = rowners_bs[rank+1]-rowners_bs[rank];
2372     for (source=0; source<rank; source++){
2373       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr);
2374       /* process values */
2375       for (i=0; i<count; i++){
2376         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2377       }
2378       /*
2379       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2380       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2381       */
2382     }
2383   }
2384 
2385   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2386   ierr = PetscFree(work);CHKERRQ(ierr);
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatRelax_MPISBAIJ"
2392 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2393 {
2394   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2395   int            ierr,mbs=mat->mbs,bs=mat->bs;
2396   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2397   Vec            bb1;
2398 
2399   PetscFunctionBegin;
2400   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2401   if (bs > 1)
2402     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2403 
2404   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2405     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2406       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2407       its--;
2408     }
2409 
2410     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2411     while (its--){
2412 
2413       /* lower triangular part: slvec0b = - B^T*xx */
2414       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2415 
2416       /* copy xx into slvec0a */
2417       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2418       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2419       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2420       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2421 
2422       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2423 
2424       /* copy bb into slvec1a */
2425       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2426       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2427       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2428       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2429 
2430       /* set slvec1b = 0 */
2431       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2432 
2433       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2434       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2435       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2436       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2437 
2438       /* upper triangular part: bb1 = bb1 - B*x */
2439       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2440 
2441       /* local diagonal sweep */
2442       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2443     }
2444     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2445   } else {
2446     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2447   }
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2453 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2454 {
2455   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2456   int            ierr;
2457   PetscScalar    mone=-1.0;
2458   Vec            lvec1,bb1;
2459 
2460   PetscFunctionBegin;
2461   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2462   if (mat->bs > 1)
2463     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2464 
2465   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2466     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2467       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2468       its--;
2469     }
2470 
2471     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2472     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2473     while (its--){
2474       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2475 
2476       /* lower diagonal part: bb1 = bb - B^T*xx */
2477       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2478       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2479 
2480       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2481       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2482       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2483 
2484       /* upper diagonal part: bb1 = bb1 - B*x */
2485       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2486       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2487 
2488       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2489 
2490       /* diagonal sweep */
2491       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2492     }
2493     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2494     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2495   } else {
2496     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2497   }
2498   PetscFunctionReturn(0);
2499 }
2500 
2501