xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2f59403fd2df48e396e6efa2e07d12d9da4cd99d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=mat->rmap.rstart;
218   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
219   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs){
256           if (a->ignore_ltriangular){
257             continue;    /* ignore lower triangular blocks */
258           } else {
259             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
260           }
261         }
262         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
263           col = in[j] - cstart_orig;          /* local col index */
264           brow = row/bs; bcol = col/bs;
265           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
266           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
267           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
268           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
269         } else if (in[j] < 0) continue;
270 #if defined(PETSC_USE_DEBUG)
271         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
272 #endif
273         else {  /* off-diag entry (B) */
274           if (mat->was_assembled) {
275             if (!baij->colmap) {
276               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
277             }
278 #if defined (PETSC_USE_CTABLE)
279             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
280             col  = col - 1;
281 #else
282             col = baij->colmap[in[j]/bs] - 1;
283 #endif
284             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
285               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
286               col =  in[j];
287               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
288               B = baij->B;
289               b = (Mat_SeqBAIJ*)(B)->data;
290               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
291               ba=b->a;
292             } else col += in[j]%bs;
293           } else col = in[j];
294           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
295           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
296           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
297         }
298       }
299     } else {  /* off processor entry */
300       if (!baij->donotstash) {
301         n_loc = 0;
302         for (j=0; j<n; j++){
303           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
304           in_loc[n_loc] = in[j];
305           if (roworiented) {
306             v_loc[n_loc] = v[i*n+j];
307           } else {
308             v_loc[n_loc] = v[j*m+i];
309           }
310           n_loc++;
311         }
312         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
313       }
314     }
315   }
316 
317   if(!baij->donotstash){
318     ierr = PetscFree(in_loc);CHKERRQ(ierr);
319     ierr = PetscFree(v_loc);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
326 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
327 {
328   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
329   const MatScalar *value;
330   MatScalar       *barray=baij->barray;
331   PetscTruth      roworiented = baij->roworiented;
332   PetscErrorCode  ierr;
333   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
334   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
335   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
336 
337   PetscFunctionBegin;
338   if(!barray) {
339     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
340     baij->barray = barray;
341   }
342 
343   if (roworiented) {
344     stepval = (n-1)*bs;
345   } else {
346     stepval = (m-1)*bs;
347   }
348   for (i=0; i<m; i++) {
349     if (im[i] < 0) continue;
350 #if defined(PETSC_USE_DEBUG)
351     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
352 #endif
353     if (im[i] >= rstart && im[i] < rend) {
354       row = im[i] - rstart;
355       for (j=0; j<n; j++) {
356         /* If NumCol = 1 then a copy is not required */
357         if ((roworiented) && (n == 1)) {
358           barray = (MatScalar*) v + i*bs2;
359         } else if((!roworiented) && (m == 1)) {
360           barray = (MatScalar*) v + j*bs2;
361         } else { /* Here a copy is required */
362           if (roworiented) {
363             value = v + i*(stepval+bs)*bs + j*bs;
364           } else {
365             value = v + j*(stepval+bs)*bs + i*bs;
366           }
367           for (ii=0; ii<bs; ii++,value+=stepval) {
368             for (jj=0; jj<bs; jj++) {
369               *barray++  = *value++;
370             }
371           }
372           barray -=bs2;
373         }
374 
375         if (in[j] >= cstart && in[j] < cend){
376           col  = in[j] - cstart;
377           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
378         }
379         else if (in[j] < 0) continue;
380 #if defined(PETSC_USE_DEBUG)
381         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
382 #endif
383         else {
384           if (mat->was_assembled) {
385             if (!baij->colmap) {
386               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
387             }
388 
389 #if defined(PETSC_USE_DEBUG)
390 #if defined (PETSC_USE_CTABLE)
391             { PetscInt data;
392               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
393               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
394             }
395 #else
396             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
397 #endif
398 #endif
399 #if defined (PETSC_USE_CTABLE)
400 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
401             col  = (col - 1)/bs;
402 #else
403             col = (baij->colmap[in[j]] - 1)/bs;
404 #endif
405             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
406               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
407               col =  in[j];
408             }
409           }
410           else col = in[j];
411           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
412         }
413       }
414     } else {
415       if (!baij->donotstash) {
416         if (roworiented) {
417           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
418         } else {
419           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
420         }
421       }
422     }
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatGetValues_MPISBAIJ"
429 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
430 {
431   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
432   PetscErrorCode ierr;
433   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
434   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
435 
436   PetscFunctionBegin;
437   for (i=0; i<m; i++) {
438     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
439     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
440     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
441       row = idxm[i] - bsrstart;
442       for (j=0; j<n; j++) {
443         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
444         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
445         if (idxn[j] >= bscstart && idxn[j] < bscend){
446           col = idxn[j] - bscstart;
447           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
448         } else {
449           if (!baij->colmap) {
450             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451           }
452 #if defined (PETSC_USE_CTABLE)
453           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
454           data --;
455 #else
456           data = baij->colmap[idxn[j]/bs]-1;
457 #endif
458           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
459           else {
460             col  = data + idxn[j]%bs;
461             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
462           }
463         }
464       }
465     } else {
466       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
467     }
468   }
469  PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatNorm_MPISBAIJ"
474 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
475 {
476   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
477   PetscErrorCode ierr;
478   PetscReal      sum[2],*lnorm2;
479 
480   PetscFunctionBegin;
481   if (baij->size == 1) {
482     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
483   } else {
484     if (type == NORM_FROBENIUS) {
485       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
486       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
487       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
488       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
489       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
490       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
491       *norm = sqrt(sum[0] + 2*sum[1]);
492       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
493     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
494       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
495       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
496       PetscReal    *rsum,*rsum2,vabs;
497       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
498       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
499       MatScalar    *v;
500 
501       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
502       rsum2 = rsum + mat->cmap.N;
503       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
504       /* Amat */
505       v = amat->a; jj = amat->j;
506       for (brow=0; brow<mbs; brow++) {
507         grow = bs*(rstart + brow);
508         nz = amat->i[brow+1] - amat->i[brow];
509         for (bcol=0; bcol<nz; bcol++){
510           gcol = bs*(rstart + *jj); jj++;
511           for (col=0; col<bs; col++){
512             for (row=0; row<bs; row++){
513               vabs = PetscAbsScalar(*v); v++;
514               rsum[gcol+col] += vabs;
515               /* non-diagonal block */
516               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
517             }
518           }
519         }
520       }
521       /* Bmat */
522       v = bmat->a; jj = bmat->j;
523       for (brow=0; brow<mbs; brow++) {
524         grow = bs*(rstart + brow);
525         nz = bmat->i[brow+1] - bmat->i[brow];
526         for (bcol=0; bcol<nz; bcol++){
527           gcol = bs*garray[*jj]; jj++;
528           for (col=0; col<bs; col++){
529             for (row=0; row<bs; row++){
530               vabs = PetscAbsScalar(*v); v++;
531               rsum[gcol+col] += vabs;
532               rsum[grow+row] += vabs;
533             }
534           }
535         }
536       }
537       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
538       *norm = 0.0;
539       for (col=0; col<mat->cmap.N; col++) {
540         if (rsum2[col] > *norm) *norm = rsum2[col];
541       }
542       ierr = PetscFree(rsum);CHKERRQ(ierr);
543     } else {
544       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
545     }
546   }
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
552 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
553 {
554   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
555   PetscErrorCode ierr;
556   PetscInt       nstash,reallocs;
557   InsertMode     addv;
558 
559   PetscFunctionBegin;
560   if (baij->donotstash) {
561     PetscFunctionReturn(0);
562   }
563 
564   /* make sure all processors are either in INSERTMODE or ADDMODE */
565   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
566   if (addv == (ADD_VALUES|INSERT_VALUES)) {
567     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
568   }
569   mat->insertmode = addv; /* in case this processor had no cache */
570 
571   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
572   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
573   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
574   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
575   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
576   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
582 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
583 {
584   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
585   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
586   PetscErrorCode ierr;
587   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
588   PetscInt       *row,*col,other_disassembled;
589   PetscMPIInt    n;
590   PetscTruth     r1,r2,r3;
591   MatScalar      *val;
592   InsertMode     addv = mat->insertmode;
593 
594   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
595   PetscFunctionBegin;
596 
597   if (!baij->donotstash) {
598     while (1) {
599       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
600       if (!flg) break;
601 
602       for (i=0; i<n;) {
603         /* Now identify the consecutive vals belonging to the same row */
604         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
605         if (j < n) ncols = j-i;
606         else       ncols = n-i;
607         /* Now assemble all these values with a single function call */
608         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
609         i = j;
610       }
611     }
612     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
613     /* Now process the block-stash. Since the values are stashed column-oriented,
614        set the roworiented flag to column oriented, and after MatSetValues()
615        restore the original flags */
616     r1 = baij->roworiented;
617     r2 = a->roworiented;
618     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
619     baij->roworiented = PETSC_FALSE;
620     a->roworiented    = PETSC_FALSE;
621     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
622     while (1) {
623       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
624       if (!flg) break;
625 
626       for (i=0; i<n;) {
627         /* Now identify the consecutive vals belonging to the same row */
628         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
629         if (j < n) ncols = j-i;
630         else       ncols = n-i;
631         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
632         i = j;
633       }
634     }
635     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
636     baij->roworiented = r1;
637     a->roworiented    = r2;
638     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
639   }
640 
641   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
642   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
643 
644   /* determine if any processor has disassembled, if so we must
645      also disassemble ourselfs, in order that we may reassemble. */
646   /*
647      if nonzero structure of submatrix B cannot change then we know that
648      no processor disassembled thus we can skip this stuff
649   */
650   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
651     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
652     if (mat->was_assembled && !other_disassembled) {
653       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
654     }
655   }
656 
657   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
658     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
659   }
660   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
661   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
662   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
663 
664   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
665   baij->rowvalues = 0;
666 
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
672 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
673 {
674   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
675   PetscErrorCode    ierr;
676   PetscInt          bs = mat->rmap.bs;
677   PetscMPIInt       size = baij->size,rank = baij->rank;
678   PetscTruth        iascii,isdraw;
679   PetscViewer       sviewer;
680   PetscViewerFormat format;
681 
682   PetscFunctionBegin;
683   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
684   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
685   if (iascii) {
686     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
687     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
688       MatInfo info;
689       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
690       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
692               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
693               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
694       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
695       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
699       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
700       PetscFunctionReturn(0);
701     } else if (format == PETSC_VIEWER_ASCII_INFO) {
702       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
703       PetscFunctionReturn(0);
704     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
705       PetscFunctionReturn(0);
706     }
707   }
708 
709   if (isdraw) {
710     PetscDraw       draw;
711     PetscTruth isnull;
712     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
713     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
714   }
715 
716   if (size == 1) {
717     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
718     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
719   } else {
720     /* assemble the entire matrix onto first processor. */
721     Mat         A;
722     Mat_SeqSBAIJ *Aloc;
723     Mat_SeqBAIJ *Bloc;
724     PetscInt         M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
725     MatScalar   *a;
726 
727     /* Should this be the same type as mat? */
728     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
729     if (!rank) {
730       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
731     } else {
732       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
733     }
734     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
735     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
736     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
737 
738     /* copy over the A part */
739     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
740     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
741     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
742 
743     for (i=0; i<mbs; i++) {
744       rvals[0] = mat->rmap.rstart + bs*i;
745       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
746       for (j=ai[i]; j<ai[i+1]; j++) {
747         col = mat->cmap.rstart+aj[j]*bs;
748         for (k=0; k<bs; k++) {
749           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
750           col++; a += bs;
751         }
752       }
753     }
754     /* copy over the B part */
755     Bloc = (Mat_SeqBAIJ*)baij->B->data;
756     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
757     for (i=0; i<mbs; i++) {
758       rvals[0] = mat->rmap.rstart + bs;
759       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
760       for (j=ai[i]; j<ai[i+1]; j++) {
761         col = baij->garray[aj[j]]*bs;
762         for (k=0; k<bs; k++) {
763           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
764           col++; a += bs;
765         }
766       }
767     }
768     ierr = PetscFree(rvals);CHKERRQ(ierr);
769     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
771     /*
772        Everyone has to call to draw the matrix since the graphics waits are
773        synchronized across all processors that share the PetscDraw object
774     */
775     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
776     if (!rank) {
777       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
778       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
779     }
780     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
781     ierr = MatDestroy(A);CHKERRQ(ierr);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNCT__
787 #define __FUNCT__ "MatView_MPISBAIJ"
788 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
789 {
790   PetscErrorCode ierr;
791   PetscTruth     iascii,isdraw,issocket,isbinary;
792 
793   PetscFunctionBegin;
794   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
795   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
796   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
797   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
798   if (iascii || isdraw || issocket || isbinary) {
799     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
800   } else {
801     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
802   }
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNCT__
807 #define __FUNCT__ "MatDestroy_MPISBAIJ"
808 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
809 {
810   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814 #if defined(PETSC_USE_LOG)
815   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
816 #endif
817   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
818   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
819   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
820   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
821 #if defined (PETSC_USE_CTABLE)
822   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
823 #else
824   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
825 #endif
826   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
827   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
828   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
829   if (baij->slvec0) {
830     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
831     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
832   }
833   if (baij->slvec1) {
834     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
835     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
836     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
837   }
838   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
839   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
840   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
841   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
842 #if defined(PETSC_USE_MAT_SINGLE)
843   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
844 #endif
845   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
846   ierr = PetscFree(baij);CHKERRQ(ierr);
847 
848   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
849   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
850   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
851   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatMult_MPISBAIJ"
857 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
858 {
859   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
860   PetscErrorCode ierr;
861   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
862   PetscScalar    *x,*from,zero=0.0;
863 
864   PetscFunctionBegin;
865   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
866   if (nt != A->cmap.n) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
868   }
869 
870   /* diagonal part */
871   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
872   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
873 
874   /* subdiagonal part */
875   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
876   CHKMEMQ;
877   /* copy x into the vec slvec0 */
878   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
879   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
880   CHKMEMQ;
881   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
882   CHKMEMQ;
883   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
884 
885   CHKMEMQ;
886   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
887   CHKMEMQ;
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   CHKMEMQ;
890   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
891     CHKMEMQ;
892   /* supperdiagonal part */
893   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
894     CHKMEMQ;
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
900 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
901 {
902   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
903   PetscErrorCode ierr;
904   PetscInt       nt;
905 
906   PetscFunctionBegin;
907   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
908   if (nt != A->cmap.n) {
909     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
910   }
911   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
912   if (nt != A->rmap.N) {
913     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
914   }
915 
916   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
917   /* do diagonal part */
918   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
919   /* do supperdiagonal part */
920   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
921   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
922   /* do subdiagonal part */
923   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
924   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
926 
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
932 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
933 {
934   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
935   PetscErrorCode ierr;
936   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
937   PetscScalar    *x,*from,zero=0.0;
938 
939   PetscFunctionBegin;
940   /*
941   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
942   PetscSynchronizedFlush(A->comm);
943   */
944   /* diagonal part */
945   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
946   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
947 
948   /* subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
950 
951   /* copy x into the vec slvec0 */
952   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
953   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
954   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
955   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
956 
957   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
958   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
959   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
960 
961   /* supperdiagonal part */
962   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
963 
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
969 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
970 {
971   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
972   PetscErrorCode ierr;
973 
974   PetscFunctionBegin;
975   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
976   /* do diagonal part */
977   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
978   /* do supperdiagonal part */
979   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
981 
982   /* do subdiagonal part */
983   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
984   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
985   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
986 
987   PetscFunctionReturn(0);
988 }
989 
990 /*
991   This only works correctly for square matrices where the subblock A->A is the
992    diagonal block
993 */
994 #undef __FUNCT__
995 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
996 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
997 {
998   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
999   PetscErrorCode ierr;
1000 
1001   PetscFunctionBegin;
1002   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1003   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatScale_MPISBAIJ"
1009 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1010 {
1011   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1012   PetscErrorCode ierr;
1013 
1014   PetscFunctionBegin;
1015   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1016   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1022 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1023 {
1024   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1025   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1026   PetscErrorCode ierr;
1027   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1028   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1029   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1030 
1031   PetscFunctionBegin;
1032   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1033   mat->getrowactive = PETSC_TRUE;
1034 
1035   if (!mat->rowvalues && (idx || v)) {
1036     /*
1037         allocate enough space to hold information from the longest row.
1038     */
1039     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1040     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1041     PetscInt     max = 1,mbs = mat->mbs,tmp;
1042     for (i=0; i<mbs; i++) {
1043       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1044       if (max < tmp) { max = tmp; }
1045     }
1046     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1047     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1048   }
1049 
1050   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1051   lrow = row - brstart;  /* local row index */
1052 
1053   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1054   if (!v)   {pvA = 0; pvB = 0;}
1055   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1056   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1057   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1058   nztot = nzA + nzB;
1059 
1060   cmap  = mat->garray;
1061   if (v  || idx) {
1062     if (nztot) {
1063       /* Sort by increasing column numbers, assuming A and B already sorted */
1064       PetscInt imark = -1;
1065       if (v) {
1066         *v = v_p = mat->rowvalues;
1067         for (i=0; i<nzB; i++) {
1068           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1069           else break;
1070         }
1071         imark = i;
1072         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1073         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1074       }
1075       if (idx) {
1076         *idx = idx_p = mat->rowindices;
1077         if (imark > -1) {
1078           for (i=0; i<imark; i++) {
1079             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1080           }
1081         } else {
1082           for (i=0; i<nzB; i++) {
1083             if (cmap[cworkB[i]/bs] < cstart)
1084               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1085             else break;
1086           }
1087           imark = i;
1088         }
1089         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1090         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1091       }
1092     } else {
1093       if (idx) *idx = 0;
1094       if (v)   *v   = 0;
1095     }
1096   }
1097   *nz = nztot;
1098   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1099   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1105 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1106 {
1107   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1108 
1109   PetscFunctionBegin;
1110   if (!baij->getrowactive) {
1111     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1112   }
1113   baij->getrowactive = PETSC_FALSE;
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1119 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1120 {
1121   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1122   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1123 
1124   PetscFunctionBegin;
1125   aA->getrow_utriangular = PETSC_TRUE;
1126   PetscFunctionReturn(0);
1127 }
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1130 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1131 {
1132   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1133   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1134 
1135   PetscFunctionBegin;
1136   aA->getrow_utriangular = PETSC_FALSE;
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1142 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1143 {
1144   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1145   PetscErrorCode ierr;
1146 
1147   PetscFunctionBegin;
1148   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1149   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1155 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1156 {
1157   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1162   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1168 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1169 {
1170   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1175   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1181 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1182 {
1183   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1184   Mat            A = a->A,B = a->B;
1185   PetscErrorCode ierr;
1186   PetscReal      isend[5],irecv[5];
1187 
1188   PetscFunctionBegin;
1189   info->block_size     = (PetscReal)matin->rmap.bs;
1190   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1191   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1192   isend[3] = info->memory;  isend[4] = info->mallocs;
1193   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1194   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1195   isend[3] += info->memory;  isend[4] += info->mallocs;
1196   if (flag == MAT_LOCAL) {
1197     info->nz_used      = isend[0];
1198     info->nz_allocated = isend[1];
1199     info->nz_unneeded  = isend[2];
1200     info->memory       = isend[3];
1201     info->mallocs      = isend[4];
1202   } else if (flag == MAT_GLOBAL_MAX) {
1203     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1204     info->nz_used      = irecv[0];
1205     info->nz_allocated = irecv[1];
1206     info->nz_unneeded  = irecv[2];
1207     info->memory       = irecv[3];
1208     info->mallocs      = irecv[4];
1209   } else if (flag == MAT_GLOBAL_SUM) {
1210     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1211     info->nz_used      = irecv[0];
1212     info->nz_allocated = irecv[1];
1213     info->nz_unneeded  = irecv[2];
1214     info->memory       = irecv[3];
1215     info->mallocs      = irecv[4];
1216   } else {
1217     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1218   }
1219   info->rows_global       = (PetscReal)A->rmap.N;
1220   info->columns_global    = (PetscReal)A->cmap.N;
1221   info->rows_local        = (PetscReal)A->rmap.N;
1222   info->columns_local     = (PetscReal)A->cmap.N;
1223   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1224   info->fill_ratio_needed = 0;
1225   info->factor_mallocs    = 0;
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1231 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1232 {
1233   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1234   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   switch (op) {
1239   case MAT_NO_NEW_NONZERO_LOCATIONS:
1240   case MAT_YES_NEW_NONZERO_LOCATIONS:
1241   case MAT_COLUMNS_UNSORTED:
1242   case MAT_COLUMNS_SORTED:
1243   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1244   case MAT_KEEP_ZEROED_ROWS:
1245   case MAT_NEW_NONZERO_LOCATION_ERR:
1246     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1247     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1248     break;
1249   case MAT_ROW_ORIENTED:
1250     a->roworiented = PETSC_TRUE;
1251     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1252     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1253     break;
1254   case MAT_ROWS_SORTED:
1255   case MAT_ROWS_UNSORTED:
1256   case MAT_YES_NEW_DIAGONALS:
1257     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1258     break;
1259   case MAT_COLUMN_ORIENTED:
1260     a->roworiented = PETSC_FALSE;
1261     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1262     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1263     break;
1264   case MAT_IGNORE_OFF_PROC_ENTRIES:
1265     a->donotstash = PETSC_TRUE;
1266     break;
1267   case MAT_NO_NEW_DIAGONALS:
1268     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1269   case MAT_USE_HASH_TABLE:
1270     a->ht_flag = PETSC_TRUE;
1271     break;
1272   case MAT_NOT_SYMMETRIC:
1273   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1274   case MAT_HERMITIAN:
1275     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1276   case MAT_SYMMETRIC:
1277   case MAT_STRUCTURALLY_SYMMETRIC:
1278   case MAT_NOT_HERMITIAN:
1279   case MAT_SYMMETRY_ETERNAL:
1280   case MAT_NOT_SYMMETRY_ETERNAL:
1281     break;
1282   case MAT_IGNORE_LOWER_TRIANGULAR:
1283     aA->ignore_ltriangular = PETSC_TRUE;
1284     break;
1285   case MAT_ERROR_LOWER_TRIANGULAR:
1286     aA->ignore_ltriangular = PETSC_FALSE;
1287     break;
1288   case MAT_GETROW_UPPERTRIANGULAR:
1289     aA->getrow_utriangular = PETSC_TRUE;
1290     break;
1291   default:
1292     SETERRQ(PETSC_ERR_SUP,"unknown option");
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1299 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1300 {
1301   PetscErrorCode ierr;
1302   PetscFunctionBegin;
1303   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1309 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1310 {
1311   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1312   Mat            a=baij->A, b=baij->B;
1313   PetscErrorCode ierr;
1314   PetscInt       nv,m,n;
1315   PetscTruth     flg;
1316 
1317   PetscFunctionBegin;
1318   if (ll != rr){
1319     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1320     if (!flg)
1321       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1322   }
1323   if (!ll) PetscFunctionReturn(0);
1324 
1325   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1326   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1327 
1328   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1329   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1330 
1331   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1332 
1333   /* left diagonalscale the off-diagonal part */
1334   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1335 
1336   /* scale the diagonal part */
1337   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1338 
1339   /* right diagonalscale the off-diagonal part */
1340   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1341   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1347 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1348 {
1349   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1350   MPI_Comm          comm = A->comm;
1351   static PetscTruth called = PETSC_FALSE;
1352   PetscErrorCode    ierr;
1353 
1354   PetscFunctionBegin;
1355   if (!a->rank) {
1356     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1357   }
1358   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1359   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1360   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1366 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1367 {
1368   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatEqual_MPISBAIJ"
1380 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1381 {
1382   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1383   Mat            a,b,c,d;
1384   PetscTruth     flg;
1385   PetscErrorCode ierr;
1386 
1387   PetscFunctionBegin;
1388   a = matA->A; b = matA->B;
1389   c = matB->A; d = matB->B;
1390 
1391   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1392   if (flg) {
1393     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1394   }
1395   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatCopy_MPISBAIJ"
1401 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1402 {
1403   PetscErrorCode ierr;
1404   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1405   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1406 
1407   PetscFunctionBegin;
1408   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1409   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1410     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1411     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1412     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1413   } else {
1414     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1415     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1416   }
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1422 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1423 {
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #include "petscblaslapack.h"
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1434 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1435 {
1436   PetscErrorCode ierr;
1437   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1438   PetscBLASInt   bnz,one=1;
1439   Mat_SeqSBAIJ   *xa,*ya;
1440   Mat_SeqBAIJ    *xb,*yb;
1441 
1442   PetscFunctionBegin;
1443   if (str == SAME_NONZERO_PATTERN) {
1444     PetscScalar alpha = a;
1445     xa = (Mat_SeqSBAIJ *)xx->A->data;
1446     ya = (Mat_SeqSBAIJ *)yy->A->data;
1447     bnz = (PetscBLASInt)xa->nz;
1448     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1449     xb = (Mat_SeqBAIJ *)xx->B->data;
1450     yb = (Mat_SeqBAIJ *)yy->B->data;
1451     bnz = (PetscBLASInt)xb->nz;
1452     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1453   } else {
1454     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1455     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1456     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1463 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1464 {
1465   PetscErrorCode ierr;
1466   PetscInt       i;
1467   PetscTruth     flg;
1468 
1469   PetscFunctionBegin;
1470   for (i=0; i<n; i++) {
1471     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1472     if (!flg) {
1473       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1474     }
1475   }
1476   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 
1481 /* -------------------------------------------------------------------*/
1482 static struct _MatOps MatOps_Values = {
1483        MatSetValues_MPISBAIJ,
1484        MatGetRow_MPISBAIJ,
1485        MatRestoreRow_MPISBAIJ,
1486        MatMult_MPISBAIJ,
1487 /* 4*/ MatMultAdd_MPISBAIJ,
1488        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1489        MatMultAdd_MPISBAIJ,
1490        0,
1491        0,
1492        0,
1493 /*10*/ 0,
1494        0,
1495        0,
1496        MatRelax_MPISBAIJ,
1497        MatTranspose_MPISBAIJ,
1498 /*15*/ MatGetInfo_MPISBAIJ,
1499        MatEqual_MPISBAIJ,
1500        MatGetDiagonal_MPISBAIJ,
1501        MatDiagonalScale_MPISBAIJ,
1502        MatNorm_MPISBAIJ,
1503 /*20*/ MatAssemblyBegin_MPISBAIJ,
1504        MatAssemblyEnd_MPISBAIJ,
1505        0,
1506        MatSetOption_MPISBAIJ,
1507        MatZeroEntries_MPISBAIJ,
1508 /*25*/ 0,
1509        0,
1510        0,
1511        0,
1512        0,
1513 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*35*/ MatDuplicate_MPISBAIJ,
1519        0,
1520        0,
1521        0,
1522        0,
1523 /*40*/ MatAXPY_MPISBAIJ,
1524        MatGetSubMatrices_MPISBAIJ,
1525        MatIncreaseOverlap_MPISBAIJ,
1526        MatGetValues_MPISBAIJ,
1527        MatCopy_MPISBAIJ,
1528 /*45*/ MatPrintHelp_MPISBAIJ,
1529        MatScale_MPISBAIJ,
1530        0,
1531        0,
1532        0,
1533 /*50*/ 0,
1534        0,
1535        0,
1536        0,
1537        0,
1538 /*55*/ 0,
1539        0,
1540        MatSetUnfactored_MPISBAIJ,
1541        0,
1542        MatSetValuesBlocked_MPISBAIJ,
1543 /*60*/ 0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*65*/ 0,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*70*/ MatGetRowMax_MPISBAIJ,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*75*/ 0,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*80*/ 0,
1564        0,
1565        0,
1566        0,
1567        MatLoad_MPISBAIJ,
1568 /*85*/ 0,
1569        0,
1570        0,
1571        0,
1572        0,
1573 /*90*/ 0,
1574        0,
1575        0,
1576        0,
1577        0,
1578 /*95*/ 0,
1579        0,
1580        0,
1581        0,
1582        0,
1583 /*100*/0,
1584        0,
1585        0,
1586        0,
1587        0,
1588 /*105*/0,
1589        MatRealPart_MPISBAIJ,
1590        MatImaginaryPart_MPISBAIJ,
1591        MatGetRowUpperTriangular_MPISBAIJ,
1592        MatRestoreRowUpperTriangular_MPISBAIJ
1593 };
1594 
1595 
1596 EXTERN_C_BEGIN
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1599 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1600 {
1601   PetscFunctionBegin;
1602   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1603   *iscopy = PETSC_FALSE;
1604   PetscFunctionReturn(0);
1605 }
1606 EXTERN_C_END
1607 
1608 EXTERN_C_BEGIN
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1611 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1612 {
1613   Mat_MPISBAIJ   *b;
1614   PetscErrorCode ierr;
1615   PetscInt       i,mbs,Mbs;
1616 
1617   PetscFunctionBegin;
1618   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1619 
1620   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1621   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1622   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1623   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1624   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1625 
1626   B->rmap.bs = B->cmap.bs = bs;
1627   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1628   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1629 
1630   if (d_nnz) {
1631     for (i=0; i<B->rmap.n/bs; i++) {
1632       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1633     }
1634   }
1635   if (o_nnz) {
1636     for (i=0; i<B->rmap.n/bs; i++) {
1637       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1638     }
1639   }
1640   B->preallocated = PETSC_TRUE;
1641 
1642   b   = (Mat_MPISBAIJ*)B->data;
1643   mbs = B->rmap.n/bs;
1644   Mbs = B->rmap.N/bs;
1645   if (mbs*bs != B->rmap.n) {
1646     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1647   }
1648 
1649   B->rmap.bs  = bs;
1650   b->bs2 = bs*bs;
1651   b->mbs = mbs;
1652   b->nbs = mbs;
1653   b->Mbs = Mbs;
1654   b->Nbs = Mbs;
1655 
1656   for (i=0; i<=b->size; i++) {
1657     b->rangebs[i] = B->rmap.range[i]/bs;
1658   }
1659   b->rstartbs = B->rmap.rstart/bs;
1660   b->rendbs   = B->rmap.rend/bs;
1661 
1662   b->cstartbs = B->cmap.rstart/bs;
1663   b->cendbs   = B->cmap.rend/bs;
1664 
1665   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1666   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1667   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1668   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1669   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1670 
1671   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1672   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1673   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1674   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1675   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1676 
1677   /* build cache for off array entries formed */
1678   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1679 
1680   PetscFunctionReturn(0);
1681 }
1682 EXTERN_C_END
1683 
1684 /*MC
1685    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1686    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1687 
1688    Options Database Keys:
1689 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1690 
1691   Level: beginner
1692 
1693 .seealso: MatCreateMPISBAIJ
1694 M*/
1695 
1696 EXTERN_C_BEGIN
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatCreate_MPISBAIJ"
1699 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1700 {
1701   Mat_MPISBAIJ   *b;
1702   PetscErrorCode ierr;
1703   PetscTruth     flg;
1704 
1705   PetscFunctionBegin;
1706 
1707   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1708   B->data = (void*)b;
1709   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1710 
1711   B->ops->destroy    = MatDestroy_MPISBAIJ;
1712   B->ops->view       = MatView_MPISBAIJ;
1713   B->mapping    = 0;
1714   B->factor     = 0;
1715   B->assembled  = PETSC_FALSE;
1716 
1717   B->insertmode = NOT_SET_VALUES;
1718   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1719   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1720 
1721   /* build local table of row and column ownerships */
1722   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1723 
1724   /* build cache for off array entries formed */
1725   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1726   b->donotstash  = PETSC_FALSE;
1727   b->colmap      = PETSC_NULL;
1728   b->garray      = PETSC_NULL;
1729   b->roworiented = PETSC_TRUE;
1730 
1731 #if defined(PETSC_USE_MAT_SINGLE)
1732   /* stuff for MatSetValues_XXX in single precision */
1733   b->setvalueslen     = 0;
1734   b->setvaluescopy    = PETSC_NULL;
1735 #endif
1736 
1737   /* stuff used in block assembly */
1738   b->barray       = 0;
1739 
1740   /* stuff used for matrix vector multiply */
1741   b->lvec         = 0;
1742   b->Mvctx        = 0;
1743   b->slvec0       = 0;
1744   b->slvec0b      = 0;
1745   b->slvec1       = 0;
1746   b->slvec1a      = 0;
1747   b->slvec1b      = 0;
1748   b->sMvctx       = 0;
1749 
1750   /* stuff for MatGetRow() */
1751   b->rowindices   = 0;
1752   b->rowvalues    = 0;
1753   b->getrowactive = PETSC_FALSE;
1754 
1755   /* hash table stuff */
1756   b->ht           = 0;
1757   b->hd           = 0;
1758   b->ht_size      = 0;
1759   b->ht_flag      = PETSC_FALSE;
1760   b->ht_fact      = 0;
1761   b->ht_total_ct  = 0;
1762   b->ht_insert_ct = 0;
1763 
1764   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1765   if (flg) {
1766     PetscReal fact = 1.39;
1767     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1768     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1769     if (fact <= 1.0) fact = 1.39;
1770     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1771     ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1772   }
1773   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1774                                      "MatStoreValues_MPISBAIJ",
1775                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1776   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1777                                      "MatRetrieveValues_MPISBAIJ",
1778                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1779   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1780                                      "MatGetDiagonalBlock_MPISBAIJ",
1781                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1783                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1784                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1785   B->symmetric                  = PETSC_TRUE;
1786   B->structurally_symmetric     = PETSC_TRUE;
1787   B->symmetric_set              = PETSC_TRUE;
1788   B->structurally_symmetric_set = PETSC_TRUE;
1789   PetscFunctionReturn(0);
1790 }
1791 EXTERN_C_END
1792 
1793 /*MC
1794    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1795 
1796    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1797    and MATMPISBAIJ otherwise.
1798 
1799    Options Database Keys:
1800 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1801 
1802   Level: beginner
1803 
1804 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1805 M*/
1806 
1807 EXTERN_C_BEGIN
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatCreate_SBAIJ"
1810 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1811 {
1812   PetscErrorCode ierr;
1813   PetscMPIInt    size;
1814 
1815   PetscFunctionBegin;
1816   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1817   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1818   if (size == 1) {
1819     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1820   } else {
1821     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1822   }
1823   PetscFunctionReturn(0);
1824 }
1825 EXTERN_C_END
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1829 /*@C
1830    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1831    the user should preallocate the matrix storage by setting the parameters
1832    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1833    performance can be increased by more than a factor of 50.
1834 
1835    Collective on Mat
1836 
1837    Input Parameters:
1838 +  A - the matrix
1839 .  bs   - size of blockk
1840 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1841            submatrix  (same for all local rows)
1842 .  d_nnz - array containing the number of block nonzeros in the various block rows
1843            in the upper triangular and diagonal part of the in diagonal portion of the local
1844            (possibly different for each block row) or PETSC_NULL.  You must leave room
1845            for the diagonal entry even if it is zero.
1846 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1847            submatrix (same for all local rows).
1848 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1849            off-diagonal portion of the local submatrix (possibly different for
1850            each block row) or PETSC_NULL.
1851 
1852 
1853    Options Database Keys:
1854 .   -mat_no_unroll - uses code that does not unroll the loops in the
1855                      block calculations (much slower)
1856 .   -mat_block_size - size of the blocks to use
1857 
1858    Notes:
1859 
1860    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1861    than it must be used on all processors that share the object for that argument.
1862 
1863    If the *_nnz parameter is given then the *_nz parameter is ignored
1864 
1865    Storage Information:
1866    For a square global matrix we define each processor's diagonal portion
1867    to be its local rows and the corresponding columns (a square submatrix);
1868    each processor's off-diagonal portion encompasses the remainder of the
1869    local matrix (a rectangular submatrix).
1870 
1871    The user can specify preallocated storage for the diagonal part of
1872    the local submatrix with either d_nz or d_nnz (not both).  Set
1873    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1874    memory allocation.  Likewise, specify preallocated storage for the
1875    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1876 
1877    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1878    the figure below we depict these three local rows and all columns (0-11).
1879 
1880 .vb
1881            0 1 2 3 4 5 6 7 8 9 10 11
1882           -------------------
1883    row 3  |  o o o d d d o o o o o o
1884    row 4  |  o o o d d d o o o o o o
1885    row 5  |  o o o d d d o o o o o o
1886           -------------------
1887 .ve
1888 
1889    Thus, any entries in the d locations are stored in the d (diagonal)
1890    submatrix, and any entries in the o locations are stored in the
1891    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1892    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1893 
1894    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1895    plus the diagonal part of the d matrix,
1896    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1897    In general, for PDE problems in which most nonzeros are near the diagonal,
1898    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1899    or you will get TERRIBLE performance; see the users' manual chapter on
1900    matrices.
1901 
1902    Level: intermediate
1903 
1904 .keywords: matrix, block, aij, compressed row, sparse, parallel
1905 
1906 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1907 @*/
1908 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1909 {
1910   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1911 
1912   PetscFunctionBegin;
1913   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1914   if (f) {
1915     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1916   }
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatCreateMPISBAIJ"
1922 /*@C
1923    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1924    (block compressed row).  For good matrix assembly performance
1925    the user should preallocate the matrix storage by setting the parameters
1926    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1927    performance can be increased by more than a factor of 50.
1928 
1929    Collective on MPI_Comm
1930 
1931    Input Parameters:
1932 +  comm - MPI communicator
1933 .  bs   - size of blockk
1934 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1935            This value should be the same as the local size used in creating the
1936            y vector for the matrix-vector product y = Ax.
1937 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1938            This value should be the same as the local size used in creating the
1939            x vector for the matrix-vector product y = Ax.
1940 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1941 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1942 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1943            submatrix  (same for all local rows)
1944 .  d_nnz - array containing the number of block nonzeros in the various block rows
1945            in the upper triangular portion of the in diagonal portion of the local
1946            (possibly different for each block block row) or PETSC_NULL.
1947            You must leave room for the diagonal entry even if it is zero.
1948 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1949            submatrix (same for all local rows).
1950 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1951            off-diagonal portion of the local submatrix (possibly different for
1952            each block row) or PETSC_NULL.
1953 
1954    Output Parameter:
1955 .  A - the matrix
1956 
1957    Options Database Keys:
1958 .   -mat_no_unroll - uses code that does not unroll the loops in the
1959                      block calculations (much slower)
1960 .   -mat_block_size - size of the blocks to use
1961 .   -mat_mpi - use the parallel matrix data structures even on one processor
1962                (defaults to using SeqBAIJ format on one processor)
1963 
1964    Notes:
1965    The number of rows and columns must be divisible by blocksize.
1966 
1967    The user MUST specify either the local or global matrix dimensions
1968    (possibly both).
1969 
1970    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1971    than it must be used on all processors that share the object for that argument.
1972 
1973    If the *_nnz parameter is given then the *_nz parameter is ignored
1974 
1975    Storage Information:
1976    For a square global matrix we define each processor's diagonal portion
1977    to be its local rows and the corresponding columns (a square submatrix);
1978    each processor's off-diagonal portion encompasses the remainder of the
1979    local matrix (a rectangular submatrix).
1980 
1981    The user can specify preallocated storage for the diagonal part of
1982    the local submatrix with either d_nz or d_nnz (not both).  Set
1983    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1984    memory allocation.  Likewise, specify preallocated storage for the
1985    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1986 
1987    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1988    the figure below we depict these three local rows and all columns (0-11).
1989 
1990 .vb
1991            0 1 2 3 4 5 6 7 8 9 10 11
1992           -------------------
1993    row 3  |  o o o d d d o o o o o o
1994    row 4  |  o o o d d d o o o o o o
1995    row 5  |  o o o d d d o o o o o o
1996           -------------------
1997 .ve
1998 
1999    Thus, any entries in the d locations are stored in the d (diagonal)
2000    submatrix, and any entries in the o locations are stored in the
2001    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2002    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2003 
2004    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2005    plus the diagonal part of the d matrix,
2006    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2007    In general, for PDE problems in which most nonzeros are near the diagonal,
2008    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2009    or you will get TERRIBLE performance; see the users' manual chapter on
2010    matrices.
2011 
2012    Level: intermediate
2013 
2014 .keywords: matrix, block, aij, compressed row, sparse, parallel
2015 
2016 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2017 @*/
2018 
2019 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2020 {
2021   PetscErrorCode ierr;
2022   PetscMPIInt    size;
2023 
2024   PetscFunctionBegin;
2025   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2026   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2027   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2028   if (size > 1) {
2029     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2030     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2031   } else {
2032     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2033     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2041 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2042 {
2043   Mat            mat;
2044   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2045   PetscErrorCode ierr;
2046   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2047   PetscScalar    *array;
2048 
2049   PetscFunctionBegin;
2050   *newmat       = 0;
2051   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2052   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2053   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2054   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2055   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2056   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2057 
2058   mat->factor       = matin->factor;
2059   mat->preallocated = PETSC_TRUE;
2060   mat->assembled    = PETSC_TRUE;
2061   mat->insertmode   = NOT_SET_VALUES;
2062 
2063   a = (Mat_MPISBAIJ*)mat->data;
2064   a->bs2   = oldmat->bs2;
2065   a->mbs   = oldmat->mbs;
2066   a->nbs   = oldmat->nbs;
2067   a->Mbs   = oldmat->Mbs;
2068   a->Nbs   = oldmat->Nbs;
2069 
2070 
2071   a->size         = oldmat->size;
2072   a->rank         = oldmat->rank;
2073   a->donotstash   = oldmat->donotstash;
2074   a->roworiented  = oldmat->roworiented;
2075   a->rowindices   = 0;
2076   a->rowvalues    = 0;
2077   a->getrowactive = PETSC_FALSE;
2078   a->barray       = 0;
2079   a->rstartbs    = oldmat->rstartbs;
2080   a->rendbs      = oldmat->rendbs;
2081   a->cstartbs    = oldmat->cstartbs;
2082   a->cendbs      = oldmat->cendbs;
2083 
2084   /* hash table stuff */
2085   a->ht           = 0;
2086   a->hd           = 0;
2087   a->ht_size      = 0;
2088   a->ht_flag      = oldmat->ht_flag;
2089   a->ht_fact      = oldmat->ht_fact;
2090   a->ht_total_ct  = 0;
2091   a->ht_insert_ct = 0;
2092 
2093   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2094   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2095   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2096   if (oldmat->colmap) {
2097 #if defined (PETSC_USE_CTABLE)
2098     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2099 #else
2100     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2101     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2102     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2103 #endif
2104   } else a->colmap = 0;
2105 
2106   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2107     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2108     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2109     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2110   } else a->garray = 0;
2111 
2112   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2113   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2114   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2115   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2116 
2117   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2119   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2121 
2122   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2123   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2124   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2126   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2127   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2128   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2129   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2130   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2135 
2136   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2137   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2138   a->sMvctx = oldmat->sMvctx;
2139   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2140 
2141   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2142   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2143   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2145   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2146   *newmat = mat;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #include "petscsys.h"
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatLoad_MPISBAIJ"
2154 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2155 {
2156   Mat            A;
2157   PetscErrorCode ierr;
2158   PetscInt       i,nz,j,rstart,rend;
2159   PetscScalar    *vals,*buf;
2160   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2161   MPI_Status     status;
2162   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2163   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2164   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2165   PetscInt       bs=1,Mbs,mbs,extra_rows;
2166   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2167   PetscInt       dcount,kmax,k,nzcount,tmp;
2168   int            fd;
2169 
2170   PetscFunctionBegin;
2171   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2172 
2173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2174   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2175   if (!rank) {
2176     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2177     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2178     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2179     if (header[3] < 0) {
2180       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2181     }
2182   }
2183 
2184   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2185   M = header[1]; N = header[2];
2186 
2187   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2188 
2189   /*
2190      This code adds extra rows to make sure the number of rows is
2191      divisible by the blocksize
2192   */
2193   Mbs        = M/bs;
2194   extra_rows = bs - M + bs*(Mbs);
2195   if (extra_rows == bs) extra_rows = 0;
2196   else                  Mbs++;
2197   if (extra_rows &&!rank) {
2198     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2199   }
2200 
2201   /* determine ownership of all rows */
2202   mbs        = Mbs/size + ((Mbs % size) > rank);
2203   m          = mbs*bs;
2204   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2205   browners   = rowners + size + 1;
2206   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2207   rowners[0] = 0;
2208   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2209   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2210   rstart = rowners[rank];
2211   rend   = rowners[rank+1];
2212 
2213   /* distribute row lengths to all processors */
2214   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2215   if (!rank) {
2216     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2217     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2218     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2219     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2220     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2221     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2222     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2223   } else {
2224     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2225   }
2226 
2227   if (!rank) {   /* procs[0] */
2228     /* calculate the number of nonzeros on each processor */
2229     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2230     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2231     for (i=0; i<size; i++) {
2232       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2233         procsnz[i] += rowlengths[j];
2234       }
2235     }
2236     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2237 
2238     /* determine max buffer needed and allocate it */
2239     maxnz = 0;
2240     for (i=0; i<size; i++) {
2241       maxnz = PetscMax(maxnz,procsnz[i]);
2242     }
2243     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2244 
2245     /* read in my part of the matrix column indices  */
2246     nz     = procsnz[0];
2247     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2248     mycols = ibuf;
2249     if (size == 1)  nz -= extra_rows;
2250     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2251     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2252 
2253     /* read in every ones (except the last) and ship off */
2254     for (i=1; i<size-1; i++) {
2255       nz   = procsnz[i];
2256       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2257       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2258     }
2259     /* read in the stuff for the last proc */
2260     if (size != 1) {
2261       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2262       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2263       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2264       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2265     }
2266     ierr = PetscFree(cols);CHKERRQ(ierr);
2267   } else {  /* procs[i], i>0 */
2268     /* determine buffer space needed for message */
2269     nz = 0;
2270     for (i=0; i<m; i++) {
2271       nz += locrowlens[i];
2272     }
2273     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2274     mycols = ibuf;
2275     /* receive message of column indices*/
2276     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2277     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2278     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2279   }
2280 
2281   /* loop over local rows, determining number of off diagonal entries */
2282   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2283   odlens   = dlens + (rend-rstart);
2284   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2285   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2286   masked1  = mask    + Mbs;
2287   masked2  = masked1 + Mbs;
2288   rowcount = 0; nzcount = 0;
2289   for (i=0; i<mbs; i++) {
2290     dcount  = 0;
2291     odcount = 0;
2292     for (j=0; j<bs; j++) {
2293       kmax = locrowlens[rowcount];
2294       for (k=0; k<kmax; k++) {
2295         tmp = mycols[nzcount++]/bs; /* block col. index */
2296         if (!mask[tmp]) {
2297           mask[tmp] = 1;
2298           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2299           else masked1[dcount++] = tmp; /* entry in diag portion */
2300         }
2301       }
2302       rowcount++;
2303     }
2304 
2305     dlens[i]  = dcount;  /* d_nzz[i] */
2306     odlens[i] = odcount; /* o_nzz[i] */
2307 
2308     /* zero out the mask elements we set */
2309     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2310     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2311   }
2312 
2313   /* create our matrix */
2314   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2315   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2316   ierr = MatSetType(A,type);CHKERRQ(ierr);
2317   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2318   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2319 
2320   if (!rank) {
2321     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2322     /* read in my part of the matrix numerical values  */
2323     nz = procsnz[0];
2324     vals = buf;
2325     mycols = ibuf;
2326     if (size == 1)  nz -= extra_rows;
2327     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2328     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2329 
2330     /* insert into matrix */
2331     jj      = rstart*bs;
2332     for (i=0; i<m; i++) {
2333       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2334       mycols += locrowlens[i];
2335       vals   += locrowlens[i];
2336       jj++;
2337     }
2338 
2339     /* read in other processors (except the last one) and ship out */
2340     for (i=1; i<size-1; i++) {
2341       nz   = procsnz[i];
2342       vals = buf;
2343       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2344       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2345     }
2346     /* the last proc */
2347     if (size != 1){
2348       nz   = procsnz[i] - extra_rows;
2349       vals = buf;
2350       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2351       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2352       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2353     }
2354     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2355 
2356   } else {
2357     /* receive numeric values */
2358     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2359 
2360     /* receive message of values*/
2361     vals   = buf;
2362     mycols = ibuf;
2363     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2364     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2365     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2366 
2367     /* insert into matrix */
2368     jj      = rstart*bs;
2369     for (i=0; i<m; i++) {
2370       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2371       mycols += locrowlens[i];
2372       vals   += locrowlens[i];
2373       jj++;
2374     }
2375   }
2376 
2377   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2378   ierr = PetscFree(buf);CHKERRQ(ierr);
2379   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2380   ierr = PetscFree(rowners);CHKERRQ(ierr);
2381   ierr = PetscFree(dlens);CHKERRQ(ierr);
2382   ierr = PetscFree(mask);CHKERRQ(ierr);
2383   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2384   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2385   *newmat = A;
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2391 /*XXXXX@
2392    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2393 
2394    Input Parameters:
2395 .  mat  - the matrix
2396 .  fact - factor
2397 
2398    Collective on Mat
2399 
2400    Level: advanced
2401 
2402   Notes:
2403    This can also be set by the command line option: -mat_use_hash_table fact
2404 
2405 .keywords: matrix, hashtable, factor, HT
2406 
2407 .seealso: MatSetOption()
2408 @XXXXX*/
2409 
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2413 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2414 {
2415   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2416   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2417   PetscReal      atmp;
2418   PetscReal      *work,*svalues,*rvalues;
2419   PetscErrorCode ierr;
2420   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2421   PetscMPIInt    rank,size;
2422   PetscInt       *rowners_bs,dest,count,source;
2423   PetscScalar    *va;
2424   MatScalar      *ba;
2425   MPI_Status     stat;
2426 
2427   PetscFunctionBegin;
2428   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2429   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2430 
2431   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2432   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2433 
2434   bs   = A->rmap.bs;
2435   mbs  = a->mbs;
2436   Mbs  = a->Mbs;
2437   ba   = b->a;
2438   bi   = b->i;
2439   bj   = b->j;
2440 
2441   /* find ownerships */
2442   rowners_bs = A->rmap.range;
2443 
2444   /* each proc creates an array to be distributed */
2445   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2446   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2447 
2448   /* row_max for B */
2449   if (rank != size-1){
2450     for (i=0; i<mbs; i++) {
2451       ncols = bi[1] - bi[0]; bi++;
2452       brow  = bs*i;
2453       for (j=0; j<ncols; j++){
2454         bcol = bs*(*bj);
2455         for (kcol=0; kcol<bs; kcol++){
2456           col = bcol + kcol;                 /* local col index */
2457           col += rowners_bs[rank+1];      /* global col index */
2458           for (krow=0; krow<bs; krow++){
2459             atmp = PetscAbsScalar(*ba); ba++;
2460             row = brow + krow;    /* local row index */
2461             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2462             if (work[col] < atmp) work[col] = atmp;
2463           }
2464         }
2465         bj++;
2466       }
2467     }
2468 
2469     /* send values to its owners */
2470     for (dest=rank+1; dest<size; dest++){
2471       svalues = work + rowners_bs[dest];
2472       count   = rowners_bs[dest+1]-rowners_bs[dest];
2473       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2474     }
2475   }
2476 
2477   /* receive values */
2478   if (rank){
2479     rvalues = work;
2480     count   = rowners_bs[rank+1]-rowners_bs[rank];
2481     for (source=0; source<rank; source++){
2482       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2483       /* process values */
2484       for (i=0; i<count; i++){
2485         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2486       }
2487     }
2488   }
2489 
2490   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2491   ierr = PetscFree(work);CHKERRQ(ierr);
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 #undef __FUNCT__
2496 #define __FUNCT__ "MatRelax_MPISBAIJ"
2497 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2498 {
2499   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2500   PetscErrorCode ierr;
2501   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2502   PetscScalar    *x,*b,*ptr,zero=0.0;
2503   Vec            bb1;
2504 
2505   PetscFunctionBegin;
2506   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2507   if (bs > 1)
2508     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2509 
2510   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2511     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2512       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2513       its--;
2514     }
2515 
2516     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2517     while (its--){
2518 
2519       /* lower triangular part: slvec0b = - B^T*xx */
2520       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2521 
2522       /* copy xx into slvec0a */
2523       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2524       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2525       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2526       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2527 
2528       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2529 
2530       /* copy bb into slvec1a */
2531       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2532       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2533       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2534       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2535 
2536       /* set slvec1b = 0 */
2537       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2538 
2539       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2540       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2541       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2542       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2543 
2544       /* upper triangular part: bb1 = bb1 - B*x */
2545       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2546 
2547       /* local diagonal sweep */
2548       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2549     }
2550     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2551   } else {
2552     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2553   }
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2559 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2560 {
2561   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2562   PetscErrorCode ierr;
2563   Vec            lvec1,bb1;
2564 
2565   PetscFunctionBegin;
2566   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2567   if (matin->rmap.bs > 1)
2568     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2569 
2570   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2571     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2572       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2573       its--;
2574     }
2575 
2576     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2577     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2578     while (its--){
2579       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2580 
2581       /* lower diagonal part: bb1 = bb - B^T*xx */
2582       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2583       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2584 
2585       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2586       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2587       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2588 
2589       /* upper diagonal part: bb1 = bb1 - B*x */
2590       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2591       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2592 
2593       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2594 
2595       /* diagonal sweep */
2596       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2597     }
2598     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2599     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2600   } else {
2601     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2602   }
2603   PetscFunctionReturn(0);
2604 }
2605 
2606