xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b4ea070f8aad0bed66186c809ab8a23e930ef111)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 extern int MatSetUpMultiply_MPISBAIJ(Mat);
8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
112         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
113         new_j = (int*)(new_a + bs2*new_nz); \
114         new_i = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 #ifndef MatSetValues_SeqBAIJ_B_Private
155 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
185  \
186         /* malloc new storage space */ \
187         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
188         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
189         new_j = (int*)(new_a + bs2*new_nz); \
190         new_i = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 #endif
231 
232 #if defined(PETSC_USE_MAT_SINGLE)
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValues_MPISBAIJ"
235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
238   int          ierr,i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   int         ierr,i,N = m*n*b->bs2;
262   MatScalar   *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283   int         ierr,i,N = m*n;
284   MatScalar   *vsingle;
285 
286   PetscFunctionBegin;
287   SETERRQ(1,"Function not yet written for SBAIJ format");
288   /* PetscFunctionReturn(0); */
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
294 {
295   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
296   int         ierr,i,N = m*n*b->bs2;
297   MatScalar   *vsingle;
298 
299   PetscFunctionBegin;
300   SETERRQ(1,"Function not yet written for SBAIJ format");
301   /* PetscFunctionReturn(0); */
302 }
303 #endif
304 
305 /* Only add/insert a(i,j) with i<=j (blocks).
306    Any a(i,j) with i>j input by user is ingored.
307 */
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
311 {
312   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
313   MatScalar    value;
314   PetscTruth   roworiented = baij->roworiented;
315   int          ierr,i,j,row,col;
316   int          rstart_orig=baij->rstart_bs;
317   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
318   int          cend_orig=baij->cend_bs,bs=baij->bs;
319 
320   /* Some Variables required in the macro */
321   Mat          A = baij->A;
322   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
323   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
324   MatScalar    *aa=a->a;
325 
326   Mat          B = baij->B;
327   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
328   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
329   MatScalar    *ba=b->a;
330 
331   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
332   int          low,high,t,ridx,cidx,bs2=a->bs2;
333   MatScalar    *ap,*bap;
334 
335   /* for stash */
336   int          n_loc, *in_loc=0;
337   MatScalar    *v_loc=0;
338 
339   PetscFunctionBegin;
340 
341   if(!baij->donotstash){
342     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
343     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
344   }
345 
346   for (i=0; i<m; i++) {
347     if (im[i] < 0) continue;
348 #if defined(PETSC_USE_BOPT_g)
349     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
350 #endif
351     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
352       row = im[i] - rstart_orig;              /* local row index */
353       for (j=0; j<n; j++) {
354         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
355         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
356           col = in[j] - cstart_orig;          /* local col index */
357           brow = row/bs; bcol = col/bs;
358           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
359           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
360           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
361           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
362         } else if (in[j] < 0) continue;
363 #if defined(PETSC_USE_BOPT_g)
364         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
365 #endif
366         else {  /* off-diag entry (B) */
367           if (mat->was_assembled) {
368             if (!baij->colmap) {
369               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
370             }
371 #if defined (PETSC_USE_CTABLE)
372             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
373             col  = col - 1;
374 #else
375             col = baij->colmap[in[j]/bs] - 1;
376 #endif
377             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
378               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
379               col =  in[j];
380               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
381               B = baij->B;
382               b = (Mat_SeqBAIJ*)(B)->data;
383               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
384               ba=b->a;
385             } else col += in[j]%bs;
386           } else col = in[j];
387           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
388           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
389           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
390         }
391       }
392     } else {  /* off processor entry */
393       if (!baij->donotstash) {
394         n_loc = 0;
395         for (j=0; j<n; j++){
396           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
397           in_loc[n_loc] = in[j];
398           if (roworiented) {
399             v_loc[n_loc] = v[i*n+j];
400           } else {
401             v_loc[n_loc] = v[j*m+i];
402           }
403           n_loc++;
404         }
405         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
406       }
407     }
408   }
409 
410   if(!baij->donotstash){
411     ierr = PetscFree(in_loc);CHKERRQ(ierr);
412     ierr = PetscFree(v_loc);CHKERRQ(ierr);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
420 {
421   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
422   const MatScalar *value;
423   MatScalar       *barray=baij->barray;
424   PetscTruth      roworiented = baij->roworiented;
425   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
426   int             rend=baij->rend,cstart=baij->cstart,stepval;
427   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
428 
429   PetscFunctionBegin;
430   if(!barray) {
431     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
432     baij->barray = barray;
433   }
434 
435   if (roworiented) {
436     stepval = (n-1)*bs;
437   } else {
438     stepval = (m-1)*bs;
439   }
440   for (i=0; i<m; i++) {
441     if (im[i] < 0) continue;
442 #if defined(PETSC_USE_BOPT_g)
443     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
444 #endif
445     if (im[i] >= rstart && im[i] < rend) {
446       row = im[i] - rstart;
447       for (j=0; j<n; j++) {
448         /* If NumCol = 1 then a copy is not required */
449         if ((roworiented) && (n == 1)) {
450           barray = (MatScalar*) v + i*bs2;
451         } else if((!roworiented) && (m == 1)) {
452           barray = (MatScalar*) v + j*bs2;
453         } else { /* Here a copy is required */
454           if (roworiented) {
455             value = v + i*(stepval+bs)*bs + j*bs;
456           } else {
457             value = v + j*(stepval+bs)*bs + i*bs;
458           }
459           for (ii=0; ii<bs; ii++,value+=stepval) {
460             for (jj=0; jj<bs; jj++) {
461               *barray++  = *value++;
462             }
463           }
464           barray -=bs2;
465         }
466 
467         if (in[j] >= cstart && in[j] < cend){
468           col  = in[j] - cstart;
469           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
470         }
471         else if (in[j] < 0) continue;
472 #if defined(PETSC_USE_BOPT_g)
473         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
474 #endif
475         else {
476           if (mat->was_assembled) {
477             if (!baij->colmap) {
478               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
479             }
480 
481 #if defined(PETSC_USE_BOPT_g)
482 #if defined (PETSC_USE_CTABLE)
483             { int data;
484               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
485               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
486             }
487 #else
488             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
489 #endif
490 #endif
491 #if defined (PETSC_USE_CTABLE)
492 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
493             col  = (col - 1)/bs;
494 #else
495             col = (baij->colmap[in[j]] - 1)/bs;
496 #endif
497             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
498               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
499               col =  in[j];
500             }
501           }
502           else col = in[j];
503           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
504         }
505       }
506     } else {
507       if (!baij->donotstash) {
508         if (roworiented) {
509           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
510         } else {
511           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
512         }
513       }
514     }
515   }
516   PetscFunctionReturn(0);
517 }
518 
519 #define HASH_KEY 0.6180339887
520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
523 #undef __FUNCT__
524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
526 {
527   PetscFunctionBegin;
528   SETERRQ(1,"Function not yet written for SBAIJ format");
529   /* PetscFunctionReturn(0); */
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
535 {
536   PetscFunctionBegin;
537   SETERRQ(1,"Function not yet written for SBAIJ format");
538   /* PetscFunctionReturn(0); */
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatGetValues_MPISBAIJ"
543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
544 {
545   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
546   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
547   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
552     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
553     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
554       row = idxm[i] - bsrstart;
555       for (j=0; j<n; j++) {
556         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
557         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
558         if (idxn[j] >= bscstart && idxn[j] < bscend){
559           col = idxn[j] - bscstart;
560           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
561         } else {
562           if (!baij->colmap) {
563             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
564           }
565 #if defined (PETSC_USE_CTABLE)
566           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
567           data --;
568 #else
569           data = baij->colmap[idxn[j]/bs]-1;
570 #endif
571           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
572           else {
573             col  = data + idxn[j]%bs;
574             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575           }
576         }
577       }
578     } else {
579       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
580     }
581   }
582  PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatNorm_MPISBAIJ"
587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
588 {
589   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
590   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
591   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
592   int        ierr;
593   PetscReal  sum[2],*lnorm2;
594 
595   PetscFunctionBegin;
596   if (baij->size == 1) {
597     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
598   } else {
599     if (type == NORM_FROBENIUS) {
600       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
601       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
602       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
603       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
605       /*
606       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
607       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
608       */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       /*
611       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
612       PetscSynchronizedFlush(mat->comm); */
613 
614       *norm = sqrt(sum[0] + 2*sum[1]);
615       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 /*
624   Creates the hash table, and sets the table
625   This table is created only once.
626   If new entried need to be added to the matrix
627   then the hash table has to be destroyed and
628   recreated.
629 */
630 #undef __FUNCT__
631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
633 {
634   PetscFunctionBegin;
635   SETERRQ(1,"Function not yet written for SBAIJ format");
636   /* PetscFunctionReturn(0); */
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
642 {
643   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
644   int         ierr,nstash,reallocs;
645   InsertMode  addv;
646 
647   PetscFunctionBegin;
648   if (baij->donotstash) {
649     PetscFunctionReturn(0);
650   }
651 
652   /* make sure all processors are either in INSERTMODE or ADDMODE */
653   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
654   if (addv == (ADD_VALUES|INSERT_VALUES)) {
655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
656   }
657   mat->insertmode = addv; /* in case this processor had no cache */
658 
659   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
673   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
674   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
675   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
676   int         *row,*col,other_disassembled;
677   PetscTruth  r1,r2,r3;
678   MatScalar   *val;
679   InsertMode  addv = mat->insertmode;
680 
681   PetscFunctionBegin;
682 
683   if (!baij->donotstash) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       /*
687       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
688       PetscSynchronizedFlush(mat->comm);
689       */
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
695         if (j < n) ncols = j-i;
696         else       ncols = n-i;
697         /* Now assemble all these values with a single function call */
698         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703     /* Now process the block-stash. Since the values are stashed column-oriented,
704        set the roworiented flag to column oriented, and after MatSetValues()
705        restore the original flags */
706     r1 = baij->roworiented;
707     r2 = a->roworiented;
708     r3 = b->roworiented;
709     baij->roworiented = PETSC_FALSE;
710     a->roworiented    = PETSC_FALSE;
711     b->roworiented    = PETSC_FALSE;
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n;) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
719         if (j < n) ncols = j-i;
720         else       ncols = n-i;
721         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
722         i = j;
723       }
724     }
725     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
726     baij->roworiented = r1;
727     a->roworiented    = r2;
728     b->roworiented    = r3;
729   }
730 
731   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746 
747   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
748     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
749   }
750   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
752 
753 #if defined(PETSC_USE_BOPT_g)
754   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
755     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
756     baij->ht_total_ct  = 0;
757     baij->ht_insert_ct = 0;
758   }
759 #endif
760   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
761     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
762     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
763     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
764   }
765 
766   if (baij->rowvalues) {
767     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
768     baij->rowvalues = 0;
769   }
770 
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
779   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
780   PetscTruth        isascii,isdraw;
781   PetscViewer       sviewer;
782   PetscViewerFormat format;
783 
784   PetscFunctionBegin;
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
786   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
787   if (isascii) {
788     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
789     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
790       MatInfo info;
791       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
794               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
795               baij->bs,(int)info.memory);CHKERRQ(ierr);
796       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
797       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
798       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
799       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
800       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
801       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_INFO) {
804       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw       draw;
811     PetscTruth isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
814   }
815 
816   if (size == 1) {
817     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
818     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
819   } else {
820     /* assemble the entire matrix onto first processor. */
821     Mat         A;
822     Mat_SeqSBAIJ *Aloc;
823     Mat_SeqBAIJ *Bloc;
824     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
825     MatScalar   *a;
826 
827     /* Should this be the same type as mat? */
828     if (!rank) {
829       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
830     } else {
831       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
832     }
833     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
834     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
835     PetscLogObjectParent(mat,A);
836 
837     /* copy over the A part */
838     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
839     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
840     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
841 
842     for (i=0; i<mbs; i++) {
843       rvals[0] = bs*(baij->rstart + i);
844       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
845       for (j=ai[i]; j<ai[i+1]; j++) {
846         col = (baij->cstart+aj[j])*bs;
847         for (k=0; k<bs; k++) {
848           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
849           col++; a += bs;
850         }
851       }
852     }
853     /* copy over the B part */
854     Bloc = (Mat_SeqBAIJ*)baij->B->data;
855     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
856     for (i=0; i<mbs; i++) {
857       rvals[0] = bs*(baij->rstart + i);
858       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
859       for (j=ai[i]; j<ai[i+1]; j++) {
860         col = baij->garray[aj[j]]*bs;
861         for (k=0; k<bs; k++) {
862           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
863           col++; a += bs;
864         }
865       }
866     }
867     ierr = PetscFree(rvals);CHKERRQ(ierr);
868     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
869     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
870     /*
871        Everyone has to call to draw the matrix since the graphics waits are
872        synchronized across all processors that share the PetscDraw object
873     */
874     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
875     if (!rank) {
876       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
877       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
878     }
879     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
880     ierr = MatDestroy(A);CHKERRQ(ierr);
881   }
882   PetscFunctionReturn(0);
883 }
884 
885 #undef __FUNCT__
886 #define __FUNCT__ "MatView_MPISBAIJ"
887 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
888 {
889   int        ierr;
890   PetscTruth isascii,isdraw,issocket,isbinary;
891 
892   PetscFunctionBegin;
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
894   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
895   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
896   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
897   if (isascii || isdraw || issocket || isbinary) {
898     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
899   } else {
900     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
901   }
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatDestroy_MPISBAIJ"
907 int MatDestroy_MPISBAIJ(Mat mat)
908 {
909   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
910   int         ierr;
911 
912   PetscFunctionBegin;
913 #if defined(PETSC_USE_LOG)
914   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
915 #endif
916   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
917   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
918   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
919   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
920   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
921 #if defined (PETSC_USE_CTABLE)
922   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
923 #else
924   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
925 #endif
926   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
927   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
928   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
929   if (baij->slvec0) {
930     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
931     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
932   }
933   if (baij->slvec1) {
934     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
935     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
936     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
937   }
938   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
939   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
940   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
941   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
942 #if defined(PETSC_USE_MAT_SINGLE)
943   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
944 #endif
945   ierr = PetscFree(baij);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatMult_MPISBAIJ"
951 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
952 {
953   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
954   int         ierr,nt,mbs=a->mbs,bs=a->bs;
955   PetscScalar *x,*from,zero=0.0;
956 
957   PetscFunctionBegin;
958   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
959   if (nt != A->n) {
960     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
961   }
962   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
963   if (nt != A->m) {
964     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
965   }
966 
967   /* diagonal part */
968   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
969   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
970 
971   /* subdiagonal part */
972   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
973 
974   /* copy x into the vec slvec0 */
975   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
976   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
977   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
978   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
979 
980   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
981   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
982   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
983 
984   /* supperdiagonal part */
985   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
986 
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
992 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
995   int         ierr,nt;
996 
997   PetscFunctionBegin;
998   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
999   if (nt != A->n) {
1000     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1001   }
1002   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1003   if (nt != A->m) {
1004     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1005   }
1006 
1007   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1008   /* do diagonal part */
1009   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1010   /* do supperdiagonal part */
1011   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1012   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1013   /* do subdiagonal part */
1014   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1015   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1016   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1017 
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1023 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1024 {
1025   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1026   int          ierr,mbs=a->mbs,bs=a->bs;
1027   PetscScalar  *x,*from,zero=0.0;
1028 
1029   PetscFunctionBegin;
1030   /*
1031   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1032   PetscSynchronizedFlush(A->comm);
1033   */
1034   /* diagonal part */
1035   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1036   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1037 
1038   /* subdiagonal part */
1039   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1040 
1041   /* copy x into the vec slvec0 */
1042   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1043   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1044   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1045   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1046 
1047   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1048   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1049   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1050 
1051   /* supperdiagonal part */
1052   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1053 
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1059 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1062   int        ierr;
1063 
1064   PetscFunctionBegin;
1065   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1066   /* do diagonal part */
1067   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1068   /* do supperdiagonal part */
1069   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1070   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1071 
1072   /* do subdiagonal part */
1073   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1074   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1075   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1076 
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1082 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1083 {
1084   int ierr;
1085 
1086   PetscFunctionBegin;
1087   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1093 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1094 {
1095   int ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 /*
1103   This only works correctly for square matrices where the subblock A->A is the
1104    diagonal block
1105 */
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1108 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1109 {
1110   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1111   int         ierr;
1112 
1113   PetscFunctionBegin;
1114   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1115   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatScale_MPISBAIJ"
1121 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1122 {
1123   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1124   int         ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1128   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1134 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1135 {
1136   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1137   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1138   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1139   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1140   int            *cmap,*idx_p,cstart = mat->cstart;
1141 
1142   PetscFunctionBegin;
1143   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1144   mat->getrowactive = PETSC_TRUE;
1145 
1146   if (!mat->rowvalues && (idx || v)) {
1147     /*
1148         allocate enough space to hold information from the longest row.
1149     */
1150     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1151     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1152     int     max = 1,mbs = mat->mbs,tmp;
1153     for (i=0; i<mbs; i++) {
1154       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1155       if (max < tmp) { max = tmp; }
1156     }
1157     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1158     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1159   }
1160 
1161   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1162   lrow = row - brstart;  /* local row index */
1163 
1164   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1165   if (!v)   {pvA = 0; pvB = 0;}
1166   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1167   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1168   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1169   nztot = nzA + nzB;
1170 
1171   cmap  = mat->garray;
1172   if (v  || idx) {
1173     if (nztot) {
1174       /* Sort by increasing column numbers, assuming A and B already sorted */
1175       int imark = -1;
1176       if (v) {
1177         *v = v_p = mat->rowvalues;
1178         for (i=0; i<nzB; i++) {
1179           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1180           else break;
1181         }
1182         imark = i;
1183         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1184         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1185       }
1186       if (idx) {
1187         *idx = idx_p = mat->rowindices;
1188         if (imark > -1) {
1189           for (i=0; i<imark; i++) {
1190             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1191           }
1192         } else {
1193           for (i=0; i<nzB; i++) {
1194             if (cmap[cworkB[i]/bs] < cstart)
1195               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1196             else break;
1197           }
1198           imark = i;
1199         }
1200         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1201         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1202       }
1203     } else {
1204       if (idx) *idx = 0;
1205       if (v)   *v   = 0;
1206     }
1207   }
1208   *nz = nztot;
1209   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1210   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1216 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1217 {
1218   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1219 
1220   PetscFunctionBegin;
1221   if (baij->getrowactive == PETSC_FALSE) {
1222     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1223   }
1224   baij->getrowactive = PETSC_FALSE;
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1230 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1231 {
1232   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1233 
1234   PetscFunctionBegin;
1235   *bs = baij->bs;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1241 int MatZeroEntries_MPISBAIJ(Mat A)
1242 {
1243   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1244   int         ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1248   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1254 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1255 {
1256   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1257   Mat         A = a->A,B = a->B;
1258   int         ierr;
1259   PetscReal   isend[5],irecv[5];
1260 
1261   PetscFunctionBegin;
1262   info->block_size     = (PetscReal)a->bs;
1263   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1264   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1265   isend[3] = info->memory;  isend[4] = info->mallocs;
1266   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1267   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1268   isend[3] += info->memory;  isend[4] += info->mallocs;
1269   if (flag == MAT_LOCAL) {
1270     info->nz_used      = isend[0];
1271     info->nz_allocated = isend[1];
1272     info->nz_unneeded  = isend[2];
1273     info->memory       = isend[3];
1274     info->mallocs      = isend[4];
1275   } else if (flag == MAT_GLOBAL_MAX) {
1276     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1277     info->nz_used      = irecv[0];
1278     info->nz_allocated = irecv[1];
1279     info->nz_unneeded  = irecv[2];
1280     info->memory       = irecv[3];
1281     info->mallocs      = irecv[4];
1282   } else if (flag == MAT_GLOBAL_SUM) {
1283     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1284     info->nz_used      = irecv[0];
1285     info->nz_allocated = irecv[1];
1286     info->nz_unneeded  = irecv[2];
1287     info->memory       = irecv[3];
1288     info->mallocs      = irecv[4];
1289   } else {
1290     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1291   }
1292   info->rows_global       = (PetscReal)A->M;
1293   info->columns_global    = (PetscReal)A->N;
1294   info->rows_local        = (PetscReal)A->m;
1295   info->columns_local     = (PetscReal)A->N;
1296   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1297   info->fill_ratio_needed = 0;
1298   info->factor_mallocs    = 0;
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1304 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1305 {
1306   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1307   int         ierr;
1308 
1309   PetscFunctionBegin;
1310   switch (op) {
1311   case MAT_NO_NEW_NONZERO_LOCATIONS:
1312   case MAT_YES_NEW_NONZERO_LOCATIONS:
1313   case MAT_COLUMNS_UNSORTED:
1314   case MAT_COLUMNS_SORTED:
1315   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1316   case MAT_KEEP_ZEROED_ROWS:
1317   case MAT_NEW_NONZERO_LOCATION_ERR:
1318     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1319     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1320     break;
1321   case MAT_ROW_ORIENTED:
1322     a->roworiented = PETSC_TRUE;
1323     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1324     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1325     break;
1326   case MAT_ROWS_SORTED:
1327   case MAT_ROWS_UNSORTED:
1328   case MAT_YES_NEW_DIAGONALS:
1329     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1330     break;
1331   case MAT_COLUMN_ORIENTED:
1332     a->roworiented = PETSC_FALSE;
1333     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1334     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1335     break;
1336   case MAT_IGNORE_OFF_PROC_ENTRIES:
1337     a->donotstash = PETSC_TRUE;
1338     break;
1339   case MAT_NO_NEW_DIAGONALS:
1340     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1341   case MAT_USE_HASH_TABLE:
1342     a->ht_flag = PETSC_TRUE;
1343     break;
1344   case MAT_NOT_SYMMETRIC:
1345   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1346   case MAT_HERMITIAN:
1347     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1348   case MAT_SYMMETRIC:
1349   case MAT_STRUCTURALLY_SYMMETRIC:
1350   case MAT_NOT_HERMITIAN:
1351   case MAT_SYMMETRY_ETERNAL:
1352   case MAT_NOT_SYMMETRY_ETERNAL:
1353     break;
1354   default:
1355     SETERRQ(PETSC_ERR_SUP,"unknown option");
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1362 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1363 {
1364   int ierr;
1365   PetscFunctionBegin;
1366   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1372 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1373 {
1374   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1375   Mat         a = baij->A,b = baij->B;
1376   int         ierr,s1,s2,s3;
1377 
1378   PetscFunctionBegin;
1379   if (ll != rr) {
1380     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1381   }
1382   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1383   if (rr) {
1384     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1385     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1386     /* Overlap communication with computation. */
1387     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1388     /*} if (ll) { */
1389     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1390     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1391     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1392     /* } */
1393   /* scale  the diagonal block */
1394   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1395 
1396   /* if (rr) { */
1397     /* Do a scatter end and then right scale the off-diagonal block */
1398     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1399     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1400   }
1401 
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1407 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1408 {
1409   PetscFunctionBegin;
1410   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1415 int MatPrintHelp_MPISBAIJ(Mat A)
1416 {
1417   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1418   MPI_Comm    comm = A->comm;
1419   static int  called = 0;
1420   int         ierr;
1421 
1422   PetscFunctionBegin;
1423   if (!a->rank) {
1424     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1425   }
1426   if (called) {PetscFunctionReturn(0);} else called = 1;
1427   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1428   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1434 int MatSetUnfactored_MPISBAIJ(Mat A)
1435 {
1436   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1437   int         ierr;
1438 
1439   PetscFunctionBegin;
1440   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatEqual_MPISBAIJ"
1448 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1449 {
1450   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1451   Mat         a,b,c,d;
1452   PetscTruth  flg;
1453   int         ierr;
1454 
1455   PetscFunctionBegin;
1456   a = matA->A; b = matA->B;
1457   c = matB->A; d = matB->B;
1458 
1459   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1460   if (flg == PETSC_TRUE) {
1461     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1462   }
1463   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1469 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1470 {
1471   int        ierr;
1472 
1473   PetscFunctionBegin;
1474   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1480 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1481 {
1482   int        i,ierr;
1483   PetscTruth flg;
1484 
1485   for (i=0; i<n; i++) {
1486     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1487     if (!flg) {
1488       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1489     }
1490   }
1491   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 
1496 /* -------------------------------------------------------------------*/
1497 static struct _MatOps MatOps_Values = {
1498        MatSetValues_MPISBAIJ,
1499        MatGetRow_MPISBAIJ,
1500        MatRestoreRow_MPISBAIJ,
1501        MatMult_MPISBAIJ,
1502 /* 4*/ MatMultAdd_MPISBAIJ,
1503        MatMultTranspose_MPISBAIJ,
1504        MatMultTransposeAdd_MPISBAIJ,
1505        0,
1506        0,
1507        0,
1508 /*10*/ 0,
1509        0,
1510        0,
1511        MatRelax_MPISBAIJ,
1512        MatTranspose_MPISBAIJ,
1513 /*15*/ MatGetInfo_MPISBAIJ,
1514        MatEqual_MPISBAIJ,
1515        MatGetDiagonal_MPISBAIJ,
1516        MatDiagonalScale_MPISBAIJ,
1517        MatNorm_MPISBAIJ,
1518 /*20*/ MatAssemblyBegin_MPISBAIJ,
1519        MatAssemblyEnd_MPISBAIJ,
1520        0,
1521        MatSetOption_MPISBAIJ,
1522        MatZeroEntries_MPISBAIJ,
1523 /*25*/ MatZeroRows_MPISBAIJ,
1524        0,
1525        0,
1526        0,
1527        0,
1528 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*35*/ MatDuplicate_MPISBAIJ,
1534        0,
1535        0,
1536        0,
1537        0,
1538 /*40*/ 0,
1539        MatGetSubMatrices_MPISBAIJ,
1540        MatIncreaseOverlap_MPISBAIJ,
1541        MatGetValues_MPISBAIJ,
1542        0,
1543 /*45*/ MatPrintHelp_MPISBAIJ,
1544        MatScale_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548 /*50*/ MatGetBlockSize_MPISBAIJ,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*55*/ 0,
1554        0,
1555        MatSetUnfactored_MPISBAIJ,
1556        0,
1557        MatSetValuesBlocked_MPISBAIJ,
1558 /*60*/ 0,
1559        0,
1560        0,
1561        MatGetPetscMaps_Petsc,
1562        0,
1563 /*65*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*70*/ MatGetRowMax_MPISBAIJ,
1569        0,
1570        0,
1571        0,
1572        0,
1573 /*75*/ 0,
1574        0,
1575        0,
1576        0,
1577        0,
1578 /*80*/ 0,
1579        0,
1580        0,
1581        0,
1582 /*85*/ MatLoad_MPISBAIJ
1583 };
1584 
1585 
1586 EXTERN_C_BEGIN
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1589 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1590 {
1591   PetscFunctionBegin;
1592   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1593   *iscopy = PETSC_FALSE;
1594   PetscFunctionReturn(0);
1595 }
1596 EXTERN_C_END
1597 
1598 EXTERN_C_BEGIN
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1601 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1602 {
1603   Mat_MPISBAIJ *b;
1604   int          ierr,i,mbs,Mbs;
1605 
1606   PetscFunctionBegin;
1607   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1608 
1609   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1610   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1611   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1612   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1613   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1614   if (d_nnz) {
1615     for (i=0; i<B->m/bs; i++) {
1616       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1617     }
1618   }
1619   if (o_nnz) {
1620     for (i=0; i<B->m/bs; i++) {
1621       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1622     }
1623   }
1624   B->preallocated = PETSC_TRUE;
1625   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1626   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1627   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1628   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1629 
1630   b   = (Mat_MPISBAIJ*)B->data;
1631   mbs = B->m/bs;
1632   Mbs = B->M/bs;
1633   if (mbs*bs != B->m) {
1634     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1635   }
1636 
1637   b->bs  = bs;
1638   b->bs2 = bs*bs;
1639   b->mbs = mbs;
1640   b->nbs = mbs;
1641   b->Mbs = Mbs;
1642   b->Nbs = Mbs;
1643 
1644   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1645   b->rowners[0]    = 0;
1646   for (i=2; i<=b->size; i++) {
1647     b->rowners[i] += b->rowners[i-1];
1648   }
1649   b->rstart    = b->rowners[b->rank];
1650   b->rend      = b->rowners[b->rank+1];
1651   b->cstart    = b->rstart;
1652   b->cend      = b->rend;
1653   for (i=0; i<=b->size; i++) {
1654     b->rowners_bs[i] = b->rowners[i]*bs;
1655   }
1656   b->rstart_bs = b-> rstart*bs;
1657   b->rend_bs   = b->rend*bs;
1658 
1659   b->cstart_bs = b->cstart*bs;
1660   b->cend_bs   = b->cend*bs;
1661 
1662   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1663   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1664   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1665   PetscLogObjectParent(B,b->A);
1666 
1667   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1668   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1669   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1670   PetscLogObjectParent(B,b->B);
1671 
1672   /* build cache for off array entries formed */
1673   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1674 
1675   PetscFunctionReturn(0);
1676 }
1677 EXTERN_C_END
1678 
1679 /*MC
1680    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1681    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1682 
1683    Options Database Keys:
1684 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1685 
1686   Level: beginner
1687 
1688 .seealso: MatCreateMPISBAIJ
1689 M*/
1690 
1691 EXTERN_C_BEGIN
1692 #undef __FUNCT__
1693 #define __FUNCT__ "MatCreate_MPISBAIJ"
1694 int MatCreate_MPISBAIJ(Mat B)
1695 {
1696   Mat_MPISBAIJ *b;
1697   int          ierr;
1698   PetscTruth   flg;
1699 
1700   PetscFunctionBegin;
1701 
1702   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1703   B->data = (void*)b;
1704   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1705   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1706 
1707   B->ops->destroy    = MatDestroy_MPISBAIJ;
1708   B->ops->view       = MatView_MPISBAIJ;
1709   B->mapping    = 0;
1710   B->factor     = 0;
1711   B->assembled  = PETSC_FALSE;
1712 
1713   B->insertmode = NOT_SET_VALUES;
1714   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1715   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1716 
1717   /* build local table of row and column ownerships */
1718   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1719   b->cowners    = b->rowners + b->size + 2;
1720   b->rowners_bs = b->cowners + b->size + 2;
1721   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1722 
1723   /* build cache for off array entries formed */
1724   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1725   b->donotstash  = PETSC_FALSE;
1726   b->colmap      = PETSC_NULL;
1727   b->garray      = PETSC_NULL;
1728   b->roworiented = PETSC_TRUE;
1729 
1730 #if defined(PETSC_USE_MAT_SINGLE)
1731   /* stuff for MatSetValues_XXX in single precision */
1732   b->setvalueslen     = 0;
1733   b->setvaluescopy    = PETSC_NULL;
1734 #endif
1735 
1736   /* stuff used in block assembly */
1737   b->barray       = 0;
1738 
1739   /* stuff used for matrix vector multiply */
1740   b->lvec         = 0;
1741   b->Mvctx        = 0;
1742   b->slvec0       = 0;
1743   b->slvec0b      = 0;
1744   b->slvec1       = 0;
1745   b->slvec1a      = 0;
1746   b->slvec1b      = 0;
1747   b->sMvctx       = 0;
1748 
1749   /* stuff for MatGetRow() */
1750   b->rowindices   = 0;
1751   b->rowvalues    = 0;
1752   b->getrowactive = PETSC_FALSE;
1753 
1754   /* hash table stuff */
1755   b->ht           = 0;
1756   b->hd           = 0;
1757   b->ht_size      = 0;
1758   b->ht_flag      = PETSC_FALSE;
1759   b->ht_fact      = 0;
1760   b->ht_total_ct  = 0;
1761   b->ht_insert_ct = 0;
1762 
1763   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1764   if (flg) {
1765     PetscReal fact = 1.39;
1766     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1767     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1768     if (fact <= 1.0) fact = 1.39;
1769     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1770     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1771   }
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1773                                      "MatStoreValues_MPISBAIJ",
1774                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1776                                      "MatRetrieveValues_MPISBAIJ",
1777                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1779                                      "MatGetDiagonalBlock_MPISBAIJ",
1780                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1782                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1783                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1784   B->symmetric                  = PETSC_TRUE;
1785   B->structurally_symmetric     = PETSC_TRUE;
1786   B->symmetric_set              = PETSC_TRUE;
1787   B->structurally_symmetric_set = PETSC_TRUE;
1788   PetscFunctionReturn(0);
1789 }
1790 EXTERN_C_END
1791 
1792 /*MC
1793    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1794 
1795    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1796    and MATMPISBAIJ otherwise.
1797 
1798    Options Database Keys:
1799 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1800 
1801   Level: beginner
1802 
1803 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1804 M*/
1805 
1806 EXTERN_C_BEGIN
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatCreate_SBAIJ"
1809 int MatCreate_SBAIJ(Mat A) {
1810   int ierr,size;
1811 
1812   PetscFunctionBegin;
1813   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1814   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1815   if (size == 1) {
1816     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1817   } else {
1818     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1819   }
1820   PetscFunctionReturn(0);
1821 }
1822 EXTERN_C_END
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1826 /*@C
1827    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1828    the user should preallocate the matrix storage by setting the parameters
1829    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1830    performance can be increased by more than a factor of 50.
1831 
1832    Collective on Mat
1833 
1834    Input Parameters:
1835 +  A - the matrix
1836 .  bs   - size of blockk
1837 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1838            submatrix  (same for all local rows)
1839 .  d_nnz - array containing the number of block nonzeros in the various block rows
1840            in the upper triangular and diagonal part of the in diagonal portion of the local
1841            (possibly different for each block row) or PETSC_NULL.  You must leave room
1842            for the diagonal entry even if it is zero.
1843 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1844            submatrix (same for all local rows).
1845 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1846            off-diagonal portion of the local submatrix (possibly different for
1847            each block row) or PETSC_NULL.
1848 
1849 
1850    Options Database Keys:
1851 .   -mat_no_unroll - uses code that does not unroll the loops in the
1852                      block calculations (much slower)
1853 .   -mat_block_size - size of the blocks to use
1854 
1855    Notes:
1856 
1857    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1858    than it must be used on all processors that share the object for that argument.
1859 
1860    Storage Information:
1861    For a square global matrix we define each processor's diagonal portion
1862    to be its local rows and the corresponding columns (a square submatrix);
1863    each processor's off-diagonal portion encompasses the remainder of the
1864    local matrix (a rectangular submatrix).
1865 
1866    The user can specify preallocated storage for the diagonal part of
1867    the local submatrix with either d_nz or d_nnz (not both).  Set
1868    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1869    memory allocation.  Likewise, specify preallocated storage for the
1870    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1871 
1872    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1873    the figure below we depict these three local rows and all columns (0-11).
1874 
1875 .vb
1876            0 1 2 3 4 5 6 7 8 9 10 11
1877           -------------------
1878    row 3  |  o o o d d d o o o o o o
1879    row 4  |  o o o d d d o o o o o o
1880    row 5  |  o o o d d d o o o o o o
1881           -------------------
1882 .ve
1883 
1884    Thus, any entries in the d locations are stored in the d (diagonal)
1885    submatrix, and any entries in the o locations are stored in the
1886    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1887    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1888 
1889    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1890    plus the diagonal part of the d matrix,
1891    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1892    In general, for PDE problems in which most nonzeros are near the diagonal,
1893    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1894    or you will get TERRIBLE performance; see the users' manual chapter on
1895    matrices.
1896 
1897    Level: intermediate
1898 
1899 .keywords: matrix, block, aij, compressed row, sparse, parallel
1900 
1901 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1902 @*/
1903 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1904 {
1905   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1906 
1907   PetscFunctionBegin;
1908   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1909   if (f) {
1910     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatCreateMPISBAIJ"
1917 /*@C
1918    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1919    (block compressed row).  For good matrix assembly performance
1920    the user should preallocate the matrix storage by setting the parameters
1921    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1922    performance can be increased by more than a factor of 50.
1923 
1924    Collective on MPI_Comm
1925 
1926    Input Parameters:
1927 +  comm - MPI communicator
1928 .  bs   - size of blockk
1929 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1930            This value should be the same as the local size used in creating the
1931            y vector for the matrix-vector product y = Ax.
1932 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1933            This value should be the same as the local size used in creating the
1934            x vector for the matrix-vector product y = Ax.
1935 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1936 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1937 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1938            submatrix  (same for all local rows)
1939 .  d_nnz - array containing the number of block nonzeros in the various block rows
1940            in the upper triangular portion of the in diagonal portion of the local
1941            (possibly different for each block block row) or PETSC_NULL.
1942            You must leave room for the diagonal entry even if it is zero.
1943 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1944            submatrix (same for all local rows).
1945 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1946            off-diagonal portion of the local submatrix (possibly different for
1947            each block row) or PETSC_NULL.
1948 
1949    Output Parameter:
1950 .  A - the matrix
1951 
1952    Options Database Keys:
1953 .   -mat_no_unroll - uses code that does not unroll the loops in the
1954                      block calculations (much slower)
1955 .   -mat_block_size - size of the blocks to use
1956 .   -mat_mpi - use the parallel matrix data structures even on one processor
1957                (defaults to using SeqBAIJ format on one processor)
1958 
1959    Notes:
1960    The user MUST specify either the local or global matrix dimensions
1961    (possibly both).
1962 
1963    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1964    than it must be used on all processors that share the object for that argument.
1965 
1966    Storage Information:
1967    For a square global matrix we define each processor's diagonal portion
1968    to be its local rows and the corresponding columns (a square submatrix);
1969    each processor's off-diagonal portion encompasses the remainder of the
1970    local matrix (a rectangular submatrix).
1971 
1972    The user can specify preallocated storage for the diagonal part of
1973    the local submatrix with either d_nz or d_nnz (not both).  Set
1974    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1975    memory allocation.  Likewise, specify preallocated storage for the
1976    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1977 
1978    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1979    the figure below we depict these three local rows and all columns (0-11).
1980 
1981 .vb
1982            0 1 2 3 4 5 6 7 8 9 10 11
1983           -------------------
1984    row 3  |  o o o d d d o o o o o o
1985    row 4  |  o o o d d d o o o o o o
1986    row 5  |  o o o d d d o o o o o o
1987           -------------------
1988 .ve
1989 
1990    Thus, any entries in the d locations are stored in the d (diagonal)
1991    submatrix, and any entries in the o locations are stored in the
1992    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1993    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1994 
1995    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1996    plus the diagonal part of the d matrix,
1997    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1998    In general, for PDE problems in which most nonzeros are near the diagonal,
1999    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2000    or you will get TERRIBLE performance; see the users' manual chapter on
2001    matrices.
2002 
2003    Level: intermediate
2004 
2005 .keywords: matrix, block, aij, compressed row, sparse, parallel
2006 
2007 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2008 @*/
2009 
2010 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2011 {
2012   int ierr,size;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2016   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2017   if (size > 1) {
2018     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2019     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2020   } else {
2021     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2022     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2023   }
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2030 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2031 {
2032   Mat          mat;
2033   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2034   int          ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2035   PetscScalar  *array;
2036 
2037   PetscFunctionBegin;
2038   *newmat       = 0;
2039   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2040   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2041 
2042   mat->factor       = matin->factor;
2043   mat->preallocated = PETSC_TRUE;
2044   mat->assembled    = PETSC_TRUE;
2045   mat->insertmode   = NOT_SET_VALUES;
2046 
2047   a = (Mat_MPISBAIJ*)mat->data;
2048   a->bs  = oldmat->bs;
2049   a->bs2 = oldmat->bs2;
2050   a->mbs = oldmat->mbs;
2051   a->nbs = oldmat->nbs;
2052   a->Mbs = oldmat->Mbs;
2053   a->Nbs = oldmat->Nbs;
2054 
2055   a->rstart       = oldmat->rstart;
2056   a->rend         = oldmat->rend;
2057   a->cstart       = oldmat->cstart;
2058   a->cend         = oldmat->cend;
2059   a->size         = oldmat->size;
2060   a->rank         = oldmat->rank;
2061   a->donotstash   = oldmat->donotstash;
2062   a->roworiented  = oldmat->roworiented;
2063   a->rowindices   = 0;
2064   a->rowvalues    = 0;
2065   a->getrowactive = PETSC_FALSE;
2066   a->barray       = 0;
2067   a->rstart_bs    = oldmat->rstart_bs;
2068   a->rend_bs      = oldmat->rend_bs;
2069   a->cstart_bs    = oldmat->cstart_bs;
2070   a->cend_bs      = oldmat->cend_bs;
2071 
2072   /* hash table stuff */
2073   a->ht           = 0;
2074   a->hd           = 0;
2075   a->ht_size      = 0;
2076   a->ht_flag      = oldmat->ht_flag;
2077   a->ht_fact      = oldmat->ht_fact;
2078   a->ht_total_ct  = 0;
2079   a->ht_insert_ct = 0;
2080 
2081   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2082   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2083   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2084   if (oldmat->colmap) {
2085 #if defined (PETSC_USE_CTABLE)
2086     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2087 #else
2088     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2089     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2090     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2091 #endif
2092   } else a->colmap = 0;
2093 
2094   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2095     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2096     PetscLogObjectMemory(mat,len*sizeof(int));
2097     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2098   } else a->garray = 0;
2099 
2100   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2101   PetscLogObjectParent(mat,a->lvec);
2102   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2103   PetscLogObjectParent(mat,a->Mvctx);
2104 
2105   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2106   PetscLogObjectParent(mat,a->slvec0);
2107   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2108   PetscLogObjectParent(mat,a->slvec1);
2109 
2110   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2111   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2113   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2114   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2115   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2116   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2117   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2118   PetscLogObjectParent(mat,a->slvec0);
2119   PetscLogObjectParent(mat,a->slvec1);
2120   PetscLogObjectParent(mat,a->slvec0b);
2121   PetscLogObjectParent(mat,a->slvec1a);
2122   PetscLogObjectParent(mat,a->slvec1b);
2123 
2124   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2125   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2126   a->sMvctx = oldmat->sMvctx;
2127   PetscLogObjectParent(mat,a->sMvctx);
2128 
2129   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2130   PetscLogObjectParent(mat,a->A);
2131   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2132   PetscLogObjectParent(mat,a->B);
2133   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2134   *newmat = mat;
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 #include "petscsys.h"
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatLoad_MPISBAIJ"
2142 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2143 {
2144   Mat          A;
2145   int          i,nz,ierr,j,rstart,rend,fd;
2146   PetscScalar  *vals,*buf;
2147   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2148   MPI_Status   status;
2149   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2150   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2151   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2152   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2153   int          dcount,kmax,k,nzcount,tmp;
2154 
2155   PetscFunctionBegin;
2156   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2157 
2158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2159   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2160   if (!rank) {
2161     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2162     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2163     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2164     if (header[3] < 0) {
2165       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2166     }
2167   }
2168 
2169   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2170   M = header[1]; N = header[2];
2171 
2172   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2173 
2174   /*
2175      This code adds extra rows to make sure the number of rows is
2176      divisible by the blocksize
2177   */
2178   Mbs        = M/bs;
2179   extra_rows = bs - M + bs*(Mbs);
2180   if (extra_rows == bs) extra_rows = 0;
2181   else                  Mbs++;
2182   if (extra_rows &&!rank) {
2183     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2184   }
2185 
2186   /* determine ownership of all rows */
2187   mbs        = Mbs/size + ((Mbs % size) > rank);
2188   m          = mbs*bs;
2189   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2190   browners   = rowners + size + 1;
2191   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2192   rowners[0] = 0;
2193   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2194   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2195   rstart = rowners[rank];
2196   rend   = rowners[rank+1];
2197 
2198   /* distribute row lengths to all processors */
2199   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2200   if (!rank) {
2201     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2202     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2203     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2204     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2205     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2206     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2207     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2208   } else {
2209     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2210   }
2211 
2212   if (!rank) {   /* procs[0] */
2213     /* calculate the number of nonzeros on each processor */
2214     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2215     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2216     for (i=0; i<size; i++) {
2217       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2218         procsnz[i] += rowlengths[j];
2219       }
2220     }
2221     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2222 
2223     /* determine max buffer needed and allocate it */
2224     maxnz = 0;
2225     for (i=0; i<size; i++) {
2226       maxnz = PetscMax(maxnz,procsnz[i]);
2227     }
2228     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2229 
2230     /* read in my part of the matrix column indices  */
2231     nz     = procsnz[0];
2232     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2233     mycols = ibuf;
2234     if (size == 1)  nz -= extra_rows;
2235     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2236     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2237 
2238     /* read in every ones (except the last) and ship off */
2239     for (i=1; i<size-1; i++) {
2240       nz   = procsnz[i];
2241       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2242       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2243     }
2244     /* read in the stuff for the last proc */
2245     if (size != 1) {
2246       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2247       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2248       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2249       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2250     }
2251     ierr = PetscFree(cols);CHKERRQ(ierr);
2252   } else {  /* procs[i], i>0 */
2253     /* determine buffer space needed for message */
2254     nz = 0;
2255     for (i=0; i<m; i++) {
2256       nz += locrowlens[i];
2257     }
2258     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2259     mycols = ibuf;
2260     /* receive message of column indices*/
2261     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2262     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2263     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2264   }
2265 
2266   /* loop over local rows, determining number of off diagonal entries */
2267   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2268   odlens   = dlens + (rend-rstart);
2269   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2270   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2271   masked1  = mask    + Mbs;
2272   masked2  = masked1 + Mbs;
2273   rowcount = 0; nzcount = 0;
2274   for (i=0; i<mbs; i++) {
2275     dcount  = 0;
2276     odcount = 0;
2277     for (j=0; j<bs; j++) {
2278       kmax = locrowlens[rowcount];
2279       for (k=0; k<kmax; k++) {
2280         tmp = mycols[nzcount++]/bs; /* block col. index */
2281         if (!mask[tmp]) {
2282           mask[tmp] = 1;
2283           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2284           else masked1[dcount++] = tmp; /* entry in diag portion */
2285         }
2286       }
2287       rowcount++;
2288     }
2289 
2290     dlens[i]  = dcount;  /* d_nzz[i] */
2291     odlens[i] = odcount; /* o_nzz[i] */
2292 
2293     /* zero out the mask elements we set */
2294     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2295     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2296   }
2297 
2298   /* create our matrix */
2299   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2300   ierr = MatSetType(A,type);CHKERRQ(ierr);
2301   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2302   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2303 
2304   if (!rank) {
2305     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2306     /* read in my part of the matrix numerical values  */
2307     nz = procsnz[0];
2308     vals = buf;
2309     mycols = ibuf;
2310     if (size == 1)  nz -= extra_rows;
2311     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2312     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2313 
2314     /* insert into matrix */
2315     jj      = rstart*bs;
2316     for (i=0; i<m; i++) {
2317       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2318       mycols += locrowlens[i];
2319       vals   += locrowlens[i];
2320       jj++;
2321     }
2322 
2323     /* read in other processors (except the last one) and ship out */
2324     for (i=1; i<size-1; i++) {
2325       nz   = procsnz[i];
2326       vals = buf;
2327       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2328       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2329     }
2330     /* the last proc */
2331     if (size != 1){
2332       nz   = procsnz[i] - extra_rows;
2333       vals = buf;
2334       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2335       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2336       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2337     }
2338     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2339 
2340   } else {
2341     /* receive numeric values */
2342     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2343 
2344     /* receive message of values*/
2345     vals   = buf;
2346     mycols = ibuf;
2347     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2348     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2349     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2350 
2351     /* insert into matrix */
2352     jj      = rstart*bs;
2353     for (i=0; i<m; i++) {
2354       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2355       mycols += locrowlens[i];
2356       vals   += locrowlens[i];
2357       jj++;
2358     }
2359   }
2360 
2361   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2362   ierr = PetscFree(buf);CHKERRQ(ierr);
2363   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2364   ierr = PetscFree(rowners);CHKERRQ(ierr);
2365   ierr = PetscFree(dlens);CHKERRQ(ierr);
2366   ierr = PetscFree(mask);CHKERRQ(ierr);
2367   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2368   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   *newmat = A;
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2375 /*@
2376    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2377 
2378    Input Parameters:
2379 .  mat  - the matrix
2380 .  fact - factor
2381 
2382    Collective on Mat
2383 
2384    Level: advanced
2385 
2386   Notes:
2387    This can also be set by the command line option: -mat_use_hash_table fact
2388 
2389 .keywords: matrix, hashtable, factor, HT
2390 
2391 .seealso: MatSetOption()
2392 @*/
2393 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2394 {
2395   PetscFunctionBegin;
2396   SETERRQ(1,"Function not yet written for SBAIJ format");
2397   /* PetscFunctionReturn(0); */
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2402 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2403 {
2404   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2405   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2406   PetscReal    atmp;
2407   PetscReal    *work,*svalues,*rvalues;
2408   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2409   int          rank,size,*rowners_bs,dest,count,source;
2410   PetscScalar  *va;
2411   MatScalar    *ba;
2412   MPI_Status   stat;
2413 
2414   PetscFunctionBegin;
2415   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2416   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2417 
2418   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2419   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2420 
2421   bs   = a->bs;
2422   mbs  = a->mbs;
2423   Mbs  = a->Mbs;
2424   ba   = b->a;
2425   bi   = b->i;
2426   bj   = b->j;
2427   /*
2428   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2429   PetscSynchronizedFlush(A->comm);
2430   */
2431 
2432   /* find ownerships */
2433   rowners_bs = a->rowners_bs;
2434   /*
2435   if (!rank){
2436     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2437   }
2438   */
2439 
2440   /* each proc creates an array to be distributed */
2441   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2442   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2443 
2444   /* row_max for B */
2445   if (rank != size-1){
2446     for (i=0; i<mbs; i++) {
2447       ncols = bi[1] - bi[0]; bi++;
2448       brow  = bs*i;
2449       for (j=0; j<ncols; j++){
2450         bcol = bs*(*bj);
2451         for (kcol=0; kcol<bs; kcol++){
2452           col = bcol + kcol;                 /* local col index */
2453           col += rowners_bs[rank+1];      /* global col index */
2454           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2455           for (krow=0; krow<bs; krow++){
2456             atmp = PetscAbsScalar(*ba); ba++;
2457             row = brow + krow;    /* local row index */
2458             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2459             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2460             if (work[col] < atmp) work[col] = atmp;
2461           }
2462         }
2463         bj++;
2464       }
2465     }
2466     /*
2467       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2468       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2469       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2470       */
2471 
2472     /* send values to its owners */
2473     for (dest=rank+1; dest<size; dest++){
2474       svalues = work + rowners_bs[dest];
2475       count   = rowners_bs[dest+1]-rowners_bs[dest];
2476       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2477       /*
2478       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2479       PetscSynchronizedFlush(A->comm);
2480       */
2481     }
2482   }
2483 
2484   /* receive values */
2485   if (rank){
2486     rvalues = work;
2487     count   = rowners_bs[rank+1]-rowners_bs[rank];
2488     for (source=0; source<rank; source++){
2489       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2490       /* process values */
2491       for (i=0; i<count; i++){
2492         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2493       }
2494       /*
2495       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2496       PetscSynchronizedFlush(A->comm);
2497       */
2498     }
2499   }
2500 
2501   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2502   ierr = PetscFree(work);CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatRelax_MPISBAIJ"
2508 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2509 {
2510   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2511   int            ierr,mbs=mat->mbs,bs=mat->bs;
2512   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2513   Vec            bb1;
2514 
2515   PetscFunctionBegin;
2516   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2517   if (bs > 1)
2518     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2519 
2520   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2521     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2522       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2523       its--;
2524     }
2525 
2526     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2527     while (its--){
2528 
2529       /* lower triangular part: slvec0b = - B^T*xx */
2530       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2531 
2532       /* copy xx into slvec0a */
2533       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2534       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2535       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2536       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2537 
2538       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2539 
2540       /* copy bb into slvec1a */
2541       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2542       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2543       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2544       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2545 
2546       /* set slvec1b = 0 */
2547       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2548 
2549       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2550       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2551       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2552       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2553 
2554       /* upper triangular part: bb1 = bb1 - B*x */
2555       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2556 
2557       /* local diagonal sweep */
2558       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2559     }
2560     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2561   } else {
2562     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2563   }
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2569 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2570 {
2571   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2572   int            ierr;
2573   PetscScalar    mone=-1.0;
2574   Vec            lvec1,bb1;
2575 
2576   PetscFunctionBegin;
2577   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2578   if (mat->bs > 1)
2579     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2580 
2581   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2582     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2583       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2584       its--;
2585     }
2586 
2587     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2588     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2589     while (its--){
2590       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2591 
2592       /* lower diagonal part: bb1 = bb - B^T*xx */
2593       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2594       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2595 
2596       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2597       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2598       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2599 
2600       /* upper diagonal part: bb1 = bb1 - B*x */
2601       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2602       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2603 
2604       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2605 
2606       /* diagonal sweep */
2607       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2608     }
2609     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2610     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2611   } else {
2612     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2613   }
2614   PetscFunctionReturn(0);
2615 }
2616 
2617