xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 7842634f46ad2ee292fa135afd07ef040c80768f)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscTruth     roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend){
379           col = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined (PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data --;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else {
399       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
400     }
401   }
402  PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatNorm_MPISBAIJ"
407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
408 {
409   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscReal      sum[2],*lnorm2;
412 
413   PetscFunctionBegin;
414   if (baij->size == 1) {
415     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
416   } else {
417     if (type == NORM_FROBENIUS) {
418       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
419       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
421       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
423       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
424       *norm = sqrt(sum[0] + 2*sum[1]);
425       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
426     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
427       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
428       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
429       PetscReal    *rsum,*rsum2,vabs;
430       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
431       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
432       MatScalar    *v;
433 
434       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
435       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
436       /* Amat */
437       v = amat->a; jj = amat->j;
438       for (brow=0; brow<mbs; brow++) {
439         grow = bs*(rstart + brow);
440         nz = amat->i[brow+1] - amat->i[brow];
441         for (bcol=0; bcol<nz; bcol++){
442           gcol = bs*(rstart + *jj); jj++;
443           for (col=0; col<bs; col++){
444             for (row=0; row<bs; row++){
445               vabs = PetscAbsScalar(*v); v++;
446               rsum[gcol+col] += vabs;
447               /* non-diagonal block */
448               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
449             }
450           }
451         }
452       }
453       /* Bmat */
454       v = bmat->a; jj = bmat->j;
455       for (brow=0; brow<mbs; brow++) {
456         grow = bs*(rstart + brow);
457         nz = bmat->i[brow+1] - bmat->i[brow];
458         for (bcol=0; bcol<nz; bcol++){
459           gcol = bs*garray[*jj]; jj++;
460           for (col=0; col<bs; col++){
461             for (row=0; row<bs; row++){
462               vabs = PetscAbsScalar(*v); v++;
463               rsum[gcol+col] += vabs;
464               rsum[grow+row] += vabs;
465             }
466           }
467         }
468       }
469       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
470       *norm = 0.0;
471       for (col=0; col<mat->cmap->N; col++) {
472         if (rsum2[col] > *norm) *norm = rsum2[col];
473       }
474       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
475     } else {
476       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
477     }
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
485 {
486   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
487   PetscErrorCode ierr;
488   PetscInt       nstash,reallocs;
489   InsertMode     addv;
490 
491   PetscFunctionBegin;
492   if (baij->donotstash) {
493     PetscFunctionReturn(0);
494   }
495 
496   /* make sure all processors are either in INSERTMODE or ADDMODE */
497   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
498   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807 
808   /* diagonal part */
809   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
810   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
811 
812   /* subdiagonal part */
813   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
814 
815   /* copy x into the vec slvec0 */
816   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
820   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   /* supperdiagonal part */
826   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMult_MPISBAIJ"
832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
833 {
834   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
835   PetscErrorCode ierr;
836   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
837   PetscScalar    *x,*from;
838 
839   PetscFunctionBegin;
840   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
841   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
842 
843   /* diagonal part */
844   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
845   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
846 
847   /* subdiagonal part */
848   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
849 
850   /* copy x into the vec slvec0 */
851   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
855   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* supperdiagonal part */
861   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
868 {
869   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
870   PetscErrorCode ierr;
871   PetscInt       nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876 
877   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
878   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
879 
880   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   /* do diagonal part */
882   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
883   /* do supperdiagonal part */
884   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
886   /* do subdiagonal part */
887   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
888   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890 
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
897 {
898   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
899   PetscErrorCode ierr;
900   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
901   PetscScalar    *x,*from,zero=0.0;
902 
903   PetscFunctionBegin;
904   /*
905   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
906   PetscSynchronizedFlush(((PetscObject)A)->comm);
907   */
908   /* diagonal part */
909   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
910   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
911 
912   /* subdiagonal part */
913   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
914 
915   /* copy x into the vec slvec0 */
916   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
917   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
918   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
919   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
920 
921   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
923   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924 
925   /* supperdiagonal part */
926   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   /* do diagonal part */
941   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
942   /* do supperdiagonal part */
943   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
945 
946   /* do subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
948   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950 
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) { max = tmp; }
1009     }
1010     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap  = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart)
1047               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1048             else break;
1049           }
1050           imark = i;
1051         }
1052         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1053         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054       }
1055     } else {
1056       if (idx) *idx = 0;
1057       if (v)   *v   = 0;
1058     }
1059   }
1060   *nz = nztot;
1061   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1069 {
1070   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1071 
1072   PetscFunctionBegin;
1073   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1074   baij->getrowactive = PETSC_FALSE;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1081 {
1082   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1083   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1084 
1085   PetscFunctionBegin;
1086   aA->getrow_utriangular = PETSC_TRUE;
1087   PetscFunctionReturn(0);
1088 }
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1092 {
1093   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1094   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1095 
1096   PetscFunctionBegin;
1097   aA->getrow_utriangular = PETSC_FALSE;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1110   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1117 {
1118   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1123   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1130 {
1131   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1136   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1143 {
1144   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1145   Mat            A = a->A,B = a->B;
1146   PetscErrorCode ierr;
1147   PetscReal      isend[5],irecv[5];
1148 
1149   PetscFunctionBegin;
1150   info->block_size     = (PetscReal)matin->rmap->bs;
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153   isend[3] = info->memory;  isend[4] = info->mallocs;
1154   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1156   isend[3] += info->memory;  isend[4] += info->mallocs;
1157   if (flag == MAT_LOCAL) {
1158     info->nz_used      = isend[0];
1159     info->nz_allocated = isend[1];
1160     info->nz_unneeded  = isend[2];
1161     info->memory       = isend[3];
1162     info->mallocs      = isend[4];
1163   } else if (flag == MAT_GLOBAL_MAX) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   } else if (flag == MAT_GLOBAL_SUM) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else {
1178     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1179   }
1180   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1181   info->fill_ratio_needed = 0;
1182   info->factor_mallocs    = 0;
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1189 {
1190   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1191   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   switch (op) {
1196   case MAT_NEW_NONZERO_LOCATIONS:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1199   case MAT_KEEP_NONZERO_PATTERN:
1200   case MAT_NEW_NONZERO_LOCATION_ERR:
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = flg;
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_NEW_DIAGONALS:
1210     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1211     break;
1212   case MAT_IGNORE_OFF_PROC_ENTRIES:
1213     a->donotstash = flg;
1214     break;
1215   case MAT_USE_HASH_TABLE:
1216     a->ht_flag = flg;
1217     break;
1218   case MAT_HERMITIAN:
1219     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1220     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1221     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1222     break;
1223   case MAT_SYMMETRIC:
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225     break;
1226   case MAT_STRUCTURALLY_SYMMETRIC:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   case MAT_SYMMETRY_ETERNAL:
1230     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1231     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1232     break;
1233   case MAT_IGNORE_LOWER_TRIANGULAR:
1234     aA->ignore_ltriangular = flg;
1235     break;
1236   case MAT_ERROR_LOWER_TRIANGULAR:
1237     aA->ignore_ltriangular = flg;
1238     break;
1239   case MAT_GETROW_UPPERTRIANGULAR:
1240     aA->getrow_utriangular = flg;
1241     break;
1242   default:
1243     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1244   }
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1250 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1251 {
1252   PetscErrorCode ierr;
1253   PetscFunctionBegin;
1254   if (MAT_INITIAL_MATRIX || *B != A) {
1255     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1262 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1263 {
1264   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1265   Mat            a=baij->A, b=baij->B;
1266   PetscErrorCode ierr;
1267   PetscInt       nv,m,n;
1268   PetscTruth     flg;
1269 
1270   PetscFunctionBegin;
1271   if (ll != rr){
1272     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1273     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1274   }
1275   if (!ll) PetscFunctionReturn(0);
1276 
1277   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1278   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1279 
1280   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1281   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1282 
1283   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284 
1285   /* left diagonalscale the off-diagonal part */
1286   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1287 
1288   /* scale the diagonal part */
1289   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1290 
1291   /* right diagonalscale the off-diagonal part */
1292   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1299 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1300 {
1301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatEqual_MPISBAIJ"
1313 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1314 {
1315   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1316   Mat            a,b,c,d;
1317   PetscTruth     flg;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   a = matA->A; b = matA->B;
1322   c = matB->A; d = matB->B;
1323 
1324   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1325   if (flg) {
1326     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1327   }
1328   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatCopy_MPISBAIJ"
1334 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1335 {
1336   PetscErrorCode ierr;
1337   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1338   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1339 
1340   PetscFunctionBegin;
1341   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1342   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1343     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1344     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1345     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1348     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1355 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1356 {
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1366 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1367 {
1368   PetscErrorCode ierr;
1369   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1370   PetscBLASInt   bnz,one=1;
1371   Mat_SeqSBAIJ   *xa,*ya;
1372   Mat_SeqBAIJ    *xb,*yb;
1373 
1374   PetscFunctionBegin;
1375   if (str == SAME_NONZERO_PATTERN) {
1376     PetscScalar alpha = a;
1377     xa = (Mat_SeqSBAIJ *)xx->A->data;
1378     ya = (Mat_SeqSBAIJ *)yy->A->data;
1379     bnz = PetscBLASIntCast(xa->nz);
1380     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1381     xb = (Mat_SeqBAIJ *)xx->B->data;
1382     yb = (Mat_SeqBAIJ *)yy->B->data;
1383     bnz = PetscBLASIntCast(xb->nz);
1384     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1385   } else {
1386     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1387     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1388     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1395 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1396 {
1397   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1398   PetscInt        rbs,cbs;
1399   PetscErrorCode  ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1403   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1404   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1405   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1406   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1407   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1414 {
1415   PetscErrorCode ierr;
1416   PetscInt       i;
1417   PetscTruth     flg;
1418 
1419   PetscFunctionBegin;
1420   for (i=0; i<n; i++) {
1421     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1422     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1423   }
1424   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 /* -------------------------------------------------------------------*/
1430 static struct _MatOps MatOps_Values = {
1431        MatSetValues_MPISBAIJ,
1432        MatGetRow_MPISBAIJ,
1433        MatRestoreRow_MPISBAIJ,
1434        MatMult_MPISBAIJ,
1435 /* 4*/ MatMultAdd_MPISBAIJ,
1436        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1437        MatMultAdd_MPISBAIJ,
1438        0,
1439        0,
1440        0,
1441 /*10*/ 0,
1442        0,
1443        0,
1444        MatSOR_MPISBAIJ,
1445        MatTranspose_MPISBAIJ,
1446 /*15*/ MatGetInfo_MPISBAIJ,
1447        MatEqual_MPISBAIJ,
1448        MatGetDiagonal_MPISBAIJ,
1449        MatDiagonalScale_MPISBAIJ,
1450        MatNorm_MPISBAIJ,
1451 /*20*/ MatAssemblyBegin_MPISBAIJ,
1452        MatAssemblyEnd_MPISBAIJ,
1453        MatSetOption_MPISBAIJ,
1454        MatZeroEntries_MPISBAIJ,
1455 /*24*/ 0,
1456        0,
1457        0,
1458        0,
1459        0,
1460 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1461        0,
1462        0,
1463        0,
1464        0,
1465 /*34*/ MatDuplicate_MPISBAIJ,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*39*/ MatAXPY_MPISBAIJ,
1471        MatGetSubMatrices_MPISBAIJ,
1472        MatIncreaseOverlap_MPISBAIJ,
1473        MatGetValues_MPISBAIJ,
1474        MatCopy_MPISBAIJ,
1475 /*44*/ 0,
1476        MatScale_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480 /*49*/ MatSetBlockSize_MPISBAIJ,
1481        0,
1482        0,
1483        0,
1484        0,
1485 /*54*/ 0,
1486        0,
1487        MatSetUnfactored_MPISBAIJ,
1488        0,
1489        MatSetValuesBlocked_MPISBAIJ,
1490 /*59*/ 0,
1491        0,
1492        0,
1493        0,
1494        0,
1495 /*64*/ 0,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*74*/ 0,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*79*/ 0,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*84*/ 0,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*89*/ 0,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*94*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*99*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*104*/0,
1536        MatRealPart_MPISBAIJ,
1537        MatImaginaryPart_MPISBAIJ,
1538        MatGetRowUpperTriangular_MPISBAIJ,
1539        MatRestoreRowUpperTriangular_MPISBAIJ,
1540 /*109*/0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*114*/0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*119*/0,
1551        0,
1552        0,
1553        0,
1554        MatLoadnew_MPISBAIJ
1555 };
1556 
1557 
1558 EXTERN_C_BEGIN
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1561 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1562 {
1563   PetscFunctionBegin;
1564   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1565   *iscopy = PETSC_FALSE;
1566   PetscFunctionReturn(0);
1567 }
1568 EXTERN_C_END
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1573 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1574 {
1575   Mat_MPISBAIJ   *b;
1576   PetscErrorCode ierr;
1577   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1578 
1579   PetscFunctionBegin;
1580   if (bs < 0){
1581     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1582       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1583     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1584     bs   = PetscAbs(bs);
1585   }
1586   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1587   bs = newbs;
1588 
1589   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1590   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1591   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1592   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1593 
1594   B->rmap->bs = B->cmap->bs = bs;
1595   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1596   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1597 
1598   if (d_nnz) {
1599     for (i=0; i<B->rmap->n/bs; i++) {
1600       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1601     }
1602   }
1603   if (o_nnz) {
1604     for (i=0; i<B->rmap->n/bs; i++) {
1605       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1606     }
1607   }
1608 
1609   b   = (Mat_MPISBAIJ*)B->data;
1610   mbs = B->rmap->n/bs;
1611   Mbs = B->rmap->N/bs;
1612   if (mbs*bs != B->rmap->n) {
1613     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1614   }
1615 
1616   B->rmap->bs  = bs;
1617   b->bs2 = bs*bs;
1618   b->mbs = mbs;
1619   b->nbs = mbs;
1620   b->Mbs = Mbs;
1621   b->Nbs = Mbs;
1622 
1623   for (i=0; i<=b->size; i++) {
1624     b->rangebs[i] = B->rmap->range[i]/bs;
1625   }
1626   b->rstartbs = B->rmap->rstart/bs;
1627   b->rendbs   = B->rmap->rend/bs;
1628 
1629   b->cstartbs = B->cmap->rstart/bs;
1630   b->cendbs   = B->cmap->rend/bs;
1631 
1632   if (!B->preallocated) {
1633     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1634     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1635     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1636     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1637     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1638     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1639     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1640     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1641     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1642   }
1643 
1644   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1645   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1646   B->preallocated = PETSC_TRUE;
1647   PetscFunctionReturn(0);
1648 }
1649 EXTERN_C_END
1650 
1651 EXTERN_C_BEGIN
1652 #if defined(PETSC_HAVE_MUMPS)
1653 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1654 #endif
1655 #if defined(PETSC_HAVE_SPOOLES)
1656 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1657 #endif
1658 #if defined(PETSC_HAVE_PASTIX)
1659 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1660 #endif
1661 EXTERN_C_END
1662 
1663 /*MC
1664    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1665    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1666 
1667    Options Database Keys:
1668 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1669 
1670   Level: beginner
1671 
1672 .seealso: MatCreateMPISBAIJ
1673 M*/
1674 
1675 EXTERN_C_BEGIN
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatCreate_MPISBAIJ"
1678 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1679 {
1680   Mat_MPISBAIJ   *b;
1681   PetscErrorCode ierr;
1682   PetscTruth     flg;
1683 
1684   PetscFunctionBegin;
1685 
1686   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1687   B->data = (void*)b;
1688   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1689 
1690   B->ops->destroy    = MatDestroy_MPISBAIJ;
1691   B->ops->view       = MatView_MPISBAIJ;
1692   B->mapping         = 0;
1693   B->assembled       = PETSC_FALSE;
1694 
1695   B->insertmode = NOT_SET_VALUES;
1696   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1697   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1698 
1699   /* build local table of row and column ownerships */
1700   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1701 
1702   /* build cache for off array entries formed */
1703   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1704   b->donotstash  = PETSC_FALSE;
1705   b->colmap      = PETSC_NULL;
1706   b->garray      = PETSC_NULL;
1707   b->roworiented = PETSC_TRUE;
1708 
1709   /* stuff used in block assembly */
1710   b->barray       = 0;
1711 
1712   /* stuff used for matrix vector multiply */
1713   b->lvec         = 0;
1714   b->Mvctx        = 0;
1715   b->slvec0       = 0;
1716   b->slvec0b      = 0;
1717   b->slvec1       = 0;
1718   b->slvec1a      = 0;
1719   b->slvec1b      = 0;
1720   b->sMvctx       = 0;
1721 
1722   /* stuff for MatGetRow() */
1723   b->rowindices   = 0;
1724   b->rowvalues    = 0;
1725   b->getrowactive = PETSC_FALSE;
1726 
1727   /* hash table stuff */
1728   b->ht           = 0;
1729   b->hd           = 0;
1730   b->ht_size      = 0;
1731   b->ht_flag      = PETSC_FALSE;
1732   b->ht_fact      = 0;
1733   b->ht_total_ct  = 0;
1734   b->ht_insert_ct = 0;
1735 
1736   b->in_loc       = 0;
1737   b->v_loc        = 0;
1738   b->n_loc        = 0;
1739   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1740     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1741     if (flg) {
1742       PetscReal fact = 1.39;
1743       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1744       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1745       if (fact <= 1.0) fact = 1.39;
1746       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1747       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1748     }
1749   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1750 
1751 #if defined(PETSC_HAVE_PASTIX)
1752   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1753 					   "MatGetFactor_mpisbaij_pastix",
1754 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1755 #endif
1756 #if defined(PETSC_HAVE_MUMPS)
1757   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1758                                      "MatGetFactor_sbaij_mumps",
1759                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1760 #endif
1761 #if defined(PETSC_HAVE_SPOOLES)
1762   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1763                                      "MatGetFactor_mpisbaij_spooles",
1764                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1765 #endif
1766   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1767                                      "MatStoreValues_MPISBAIJ",
1768                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1770                                      "MatRetrieveValues_MPISBAIJ",
1771                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1773                                      "MatGetDiagonalBlock_MPISBAIJ",
1774                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1776                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1777                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1778   B->symmetric                  = PETSC_TRUE;
1779   B->structurally_symmetric     = PETSC_TRUE;
1780   B->symmetric_set              = PETSC_TRUE;
1781   B->structurally_symmetric_set = PETSC_TRUE;
1782   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 EXTERN_C_END
1786 
1787 /*MC
1788    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1789 
1790    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1791    and MATMPISBAIJ otherwise.
1792 
1793    Options Database Keys:
1794 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1795 
1796   Level: beginner
1797 
1798 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1799 M*/
1800 
1801 EXTERN_C_BEGIN
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatCreate_SBAIJ"
1804 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1805 {
1806   PetscErrorCode ierr;
1807   PetscMPIInt    size;
1808 
1809   PetscFunctionBegin;
1810   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1811   if (size == 1) {
1812     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1813   } else {
1814     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 EXTERN_C_END
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1822 /*@C
1823    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1824    the user should preallocate the matrix storage by setting the parameters
1825    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1826    performance can be increased by more than a factor of 50.
1827 
1828    Collective on Mat
1829 
1830    Input Parameters:
1831 +  A - the matrix
1832 .  bs   - size of blockk
1833 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1834            submatrix  (same for all local rows)
1835 .  d_nnz - array containing the number of block nonzeros in the various block rows
1836            in the upper triangular and diagonal part of the in diagonal portion of the local
1837            (possibly different for each block row) or PETSC_NULL.  You must leave room
1838            for the diagonal entry even if it is zero.
1839 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1840            submatrix (same for all local rows).
1841 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1842            off-diagonal portion of the local submatrix (possibly different for
1843            each block row) or PETSC_NULL.
1844 
1845 
1846    Options Database Keys:
1847 .   -mat_no_unroll - uses code that does not unroll the loops in the
1848                      block calculations (much slower)
1849 .   -mat_block_size - size of the blocks to use
1850 
1851    Notes:
1852 
1853    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1854    than it must be used on all processors that share the object for that argument.
1855 
1856    If the *_nnz parameter is given then the *_nz parameter is ignored
1857 
1858    Storage Information:
1859    For a square global matrix we define each processor's diagonal portion
1860    to be its local rows and the corresponding columns (a square submatrix);
1861    each processor's off-diagonal portion encompasses the remainder of the
1862    local matrix (a rectangular submatrix).
1863 
1864    The user can specify preallocated storage for the diagonal part of
1865    the local submatrix with either d_nz or d_nnz (not both).  Set
1866    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1867    memory allocation.  Likewise, specify preallocated storage for the
1868    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1869 
1870    You can call MatGetInfo() to get information on how effective the preallocation was;
1871    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1872    You can also run with the option -info and look for messages with the string
1873    malloc in them to see if additional memory allocation was needed.
1874 
1875    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1876    the figure below we depict these three local rows and all columns (0-11).
1877 
1878 .vb
1879            0 1 2 3 4 5 6 7 8 9 10 11
1880           -------------------
1881    row 3  |  o o o d d d o o o o o o
1882    row 4  |  o o o d d d o o o o o o
1883    row 5  |  o o o d d d o o o o o o
1884           -------------------
1885 .ve
1886 
1887    Thus, any entries in the d locations are stored in the d (diagonal)
1888    submatrix, and any entries in the o locations are stored in the
1889    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1890    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1891 
1892    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1893    plus the diagonal part of the d matrix,
1894    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1895    In general, for PDE problems in which most nonzeros are near the diagonal,
1896    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1897    or you will get TERRIBLE performance; see the users' manual chapter on
1898    matrices.
1899 
1900    Level: intermediate
1901 
1902 .keywords: matrix, block, aij, compressed row, sparse, parallel
1903 
1904 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1905 @*/
1906 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1907 {
1908   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1909 
1910   PetscFunctionBegin;
1911   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1912   if (f) {
1913     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatCreateMPISBAIJ"
1920 /*@C
1921    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1922    (block compressed row).  For good matrix assembly performance
1923    the user should preallocate the matrix storage by setting the parameters
1924    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1925    performance can be increased by more than a factor of 50.
1926 
1927    Collective on MPI_Comm
1928 
1929    Input Parameters:
1930 +  comm - MPI communicator
1931 .  bs   - size of blockk
1932 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1933            This value should be the same as the local size used in creating the
1934            y vector for the matrix-vector product y = Ax.
1935 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1936            This value should be the same as the local size used in creating the
1937            x vector for the matrix-vector product y = Ax.
1938 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1939 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1940 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1941            submatrix  (same for all local rows)
1942 .  d_nnz - array containing the number of block nonzeros in the various block rows
1943            in the upper triangular portion of the in diagonal portion of the local
1944            (possibly different for each block block row) or PETSC_NULL.
1945            You must leave room for the diagonal entry even if it is zero.
1946 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1947            submatrix (same for all local rows).
1948 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1949            off-diagonal portion of the local submatrix (possibly different for
1950            each block row) or PETSC_NULL.
1951 
1952    Output Parameter:
1953 .  A - the matrix
1954 
1955    Options Database Keys:
1956 .   -mat_no_unroll - uses code that does not unroll the loops in the
1957                      block calculations (much slower)
1958 .   -mat_block_size - size of the blocks to use
1959 .   -mat_mpi - use the parallel matrix data structures even on one processor
1960                (defaults to using SeqBAIJ format on one processor)
1961 
1962    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1963    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1964    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1965 
1966    Notes:
1967    The number of rows and columns must be divisible by blocksize.
1968    This matrix type does not support complex Hermitian operation.
1969 
1970    The user MUST specify either the local or global matrix dimensions
1971    (possibly both).
1972 
1973    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1974    than it must be used on all processors that share the object for that argument.
1975 
1976    If the *_nnz parameter is given then the *_nz parameter is ignored
1977 
1978    Storage Information:
1979    For a square global matrix we define each processor's diagonal portion
1980    to be its local rows and the corresponding columns (a square submatrix);
1981    each processor's off-diagonal portion encompasses the remainder of the
1982    local matrix (a rectangular submatrix).
1983 
1984    The user can specify preallocated storage for the diagonal part of
1985    the local submatrix with either d_nz or d_nnz (not both).  Set
1986    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1987    memory allocation.  Likewise, specify preallocated storage for the
1988    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1989 
1990    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1991    the figure below we depict these three local rows and all columns (0-11).
1992 
1993 .vb
1994            0 1 2 3 4 5 6 7 8 9 10 11
1995           -------------------
1996    row 3  |  o o o d d d o o o o o o
1997    row 4  |  o o o d d d o o o o o o
1998    row 5  |  o o o d d d o o o o o o
1999           -------------------
2000 .ve
2001 
2002    Thus, any entries in the d locations are stored in the d (diagonal)
2003    submatrix, and any entries in the o locations are stored in the
2004    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2005    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2006 
2007    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2008    plus the diagonal part of the d matrix,
2009    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2010    In general, for PDE problems in which most nonzeros are near the diagonal,
2011    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2012    or you will get TERRIBLE performance; see the users' manual chapter on
2013    matrices.
2014 
2015    Level: intermediate
2016 
2017 .keywords: matrix, block, aij, compressed row, sparse, parallel
2018 
2019 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2020 @*/
2021 
2022 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2023 {
2024   PetscErrorCode ierr;
2025   PetscMPIInt    size;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2029   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2030   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2031   if (size > 1) {
2032     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2033     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2034   } else {
2035     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2036     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2037   }
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2044 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2045 {
2046   Mat            mat;
2047   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2048   PetscErrorCode ierr;
2049   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2050   PetscScalar    *array;
2051 
2052   PetscFunctionBegin;
2053   *newmat       = 0;
2054   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2055   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2056   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2057   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2058   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2059   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2060 
2061   mat->factortype   = matin->factortype;
2062   mat->preallocated = PETSC_TRUE;
2063   mat->assembled    = PETSC_TRUE;
2064   mat->insertmode   = NOT_SET_VALUES;
2065 
2066   a = (Mat_MPISBAIJ*)mat->data;
2067   a->bs2   = oldmat->bs2;
2068   a->mbs   = oldmat->mbs;
2069   a->nbs   = oldmat->nbs;
2070   a->Mbs   = oldmat->Mbs;
2071   a->Nbs   = oldmat->Nbs;
2072 
2073 
2074   a->size         = oldmat->size;
2075   a->rank         = oldmat->rank;
2076   a->donotstash   = oldmat->donotstash;
2077   a->roworiented  = oldmat->roworiented;
2078   a->rowindices   = 0;
2079   a->rowvalues    = 0;
2080   a->getrowactive = PETSC_FALSE;
2081   a->barray       = 0;
2082   a->rstartbs    = oldmat->rstartbs;
2083   a->rendbs      = oldmat->rendbs;
2084   a->cstartbs    = oldmat->cstartbs;
2085   a->cendbs      = oldmat->cendbs;
2086 
2087   /* hash table stuff */
2088   a->ht           = 0;
2089   a->hd           = 0;
2090   a->ht_size      = 0;
2091   a->ht_flag      = oldmat->ht_flag;
2092   a->ht_fact      = oldmat->ht_fact;
2093   a->ht_total_ct  = 0;
2094   a->ht_insert_ct = 0;
2095 
2096   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2097   if (oldmat->colmap) {
2098 #if defined (PETSC_USE_CTABLE)
2099     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2100 #else
2101     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2102     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2103     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2104 #endif
2105   } else a->colmap = 0;
2106 
2107   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2108     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2109     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2110     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2111   } else a->garray = 0;
2112 
2113   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2114   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2115   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2116   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2117   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2118 
2119   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2121   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2122   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2123 
2124   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2125   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2126   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2127   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2128   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2129   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2131   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2137 
2138   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2139   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2140   a->sMvctx = oldmat->sMvctx;
2141   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2142 
2143   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2145   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2146   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2147   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2148   *newmat = mat;
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatLoad_MPISBAIJ"
2154 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2155 {
2156   Mat            A;
2157   PetscErrorCode ierr;
2158   PetscInt       i,nz,j,rstart,rend;
2159   PetscScalar    *vals,*buf;
2160   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2161   MPI_Status     status;
2162   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2163   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2164   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2165   PetscInt       bs=1,Mbs,mbs,extra_rows;
2166   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2167   PetscInt       dcount,kmax,k,nzcount,tmp;
2168   int            fd;
2169 
2170   PetscFunctionBegin;
2171   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2172     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2173   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2174 
2175   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2176   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2177   if (!rank) {
2178     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2179     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2180     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2181     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2182   }
2183 
2184   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2185   M = header[1]; N = header[2];
2186 
2187   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2188 
2189   /*
2190      This code adds extra rows to make sure the number of rows is
2191      divisible by the blocksize
2192   */
2193   Mbs        = M/bs;
2194   extra_rows = bs - M + bs*(Mbs);
2195   if (extra_rows == bs) extra_rows = 0;
2196   else                  Mbs++;
2197   if (extra_rows &&!rank) {
2198     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2199   }
2200 
2201   /* determine ownership of all rows */
2202   mbs        = Mbs/size + ((Mbs % size) > rank);
2203   m          = mbs*bs;
2204   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2205   mmbs       = PetscMPIIntCast(mbs);
2206   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2207   rowners[0] = 0;
2208   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2209   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2210   rstart = rowners[rank];
2211   rend   = rowners[rank+1];
2212 
2213   /* distribute row lengths to all processors */
2214   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2215   if (!rank) {
2216     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2217     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2218     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2219     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2220     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2221     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2222     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2223   } else {
2224     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2225   }
2226 
2227   if (!rank) {   /* procs[0] */
2228     /* calculate the number of nonzeros on each processor */
2229     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2230     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2231     for (i=0; i<size; i++) {
2232       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2233         procsnz[i] += rowlengths[j];
2234       }
2235     }
2236     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2237 
2238     /* determine max buffer needed and allocate it */
2239     maxnz = 0;
2240     for (i=0; i<size; i++) {
2241       maxnz = PetscMax(maxnz,procsnz[i]);
2242     }
2243     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2244 
2245     /* read in my part of the matrix column indices  */
2246     nz     = procsnz[0];
2247     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2248     mycols = ibuf;
2249     if (size == 1)  nz -= extra_rows;
2250     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2251     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2252 
2253     /* read in every ones (except the last) and ship off */
2254     for (i=1; i<size-1; i++) {
2255       nz   = procsnz[i];
2256       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2257       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2258     }
2259     /* read in the stuff for the last proc */
2260     if (size != 1) {
2261       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2262       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2263       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2264       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2265     }
2266     ierr = PetscFree(cols);CHKERRQ(ierr);
2267   } else {  /* procs[i], i>0 */
2268     /* determine buffer space needed for message */
2269     nz = 0;
2270     for (i=0; i<m; i++) {
2271       nz += locrowlens[i];
2272     }
2273     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2274     mycols = ibuf;
2275     /* receive message of column indices*/
2276     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2277     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2278     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2279   }
2280 
2281   /* loop over local rows, determining number of off diagonal entries */
2282   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2283   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2284   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2285   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2286   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2287   rowcount = 0;
2288   nzcount  = 0;
2289   for (i=0; i<mbs; i++) {
2290     dcount  = 0;
2291     odcount = 0;
2292     for (j=0; j<bs; j++) {
2293       kmax = locrowlens[rowcount];
2294       for (k=0; k<kmax; k++) {
2295         tmp = mycols[nzcount++]/bs; /* block col. index */
2296         if (!mask[tmp]) {
2297           mask[tmp] = 1;
2298           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2299           else masked1[dcount++] = tmp; /* entry in diag portion */
2300         }
2301       }
2302       rowcount++;
2303     }
2304 
2305     dlens[i]  = dcount;  /* d_nzz[i] */
2306     odlens[i] = odcount; /* o_nzz[i] */
2307 
2308     /* zero out the mask elements we set */
2309     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2310     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2311   }
2312 
2313   /* create our matrix */
2314   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2315   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2316   ierr = MatSetType(A,type);CHKERRQ(ierr);
2317   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2318   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2319 
2320   if (!rank) {
2321     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2322     /* read in my part of the matrix numerical values  */
2323     nz = procsnz[0];
2324     vals = buf;
2325     mycols = ibuf;
2326     if (size == 1)  nz -= extra_rows;
2327     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2328     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2329 
2330     /* insert into matrix */
2331     jj      = rstart*bs;
2332     for (i=0; i<m; i++) {
2333       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2334       mycols += locrowlens[i];
2335       vals   += locrowlens[i];
2336       jj++;
2337     }
2338 
2339     /* read in other processors (except the last one) and ship out */
2340     for (i=1; i<size-1; i++) {
2341       nz   = procsnz[i];
2342       vals = buf;
2343       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2344       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2345     }
2346     /* the last proc */
2347     if (size != 1){
2348       nz   = procsnz[i] - extra_rows;
2349       vals = buf;
2350       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2351       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2352       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2353     }
2354     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2355 
2356   } else {
2357     /* receive numeric values */
2358     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2359 
2360     /* receive message of values*/
2361     vals   = buf;
2362     mycols = ibuf;
2363     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2364     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2365     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2366 
2367     /* insert into matrix */
2368     jj      = rstart*bs;
2369     for (i=0; i<m; i++) {
2370       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2371       mycols += locrowlens[i];
2372       vals   += locrowlens[i];
2373       jj++;
2374     }
2375   }
2376 
2377   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2378   ierr = PetscFree(buf);CHKERRQ(ierr);
2379   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2380   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2381   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2382   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2383   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2384   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2385   *newmat = A;
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatLoadnew_MPISBAIJ"
2391 PetscErrorCode MatLoadnew_MPISBAIJ(PetscViewer viewer, Mat newmat)
2392 {
2393   PetscErrorCode ierr;
2394   PetscInt       i,nz,j,rstart,rend;
2395   PetscScalar    *vals,*buf;
2396   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2397   MPI_Status     status;
2398   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2399   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2400   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2401   PetscInt       bs=1,Mbs,mbs,extra_rows;
2402   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2403   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2404   int            fd;
2405 
2406   PetscFunctionBegin;
2407   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2408     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2409   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2410 
2411   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2412   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2413   if (!rank) {
2414     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2415     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2416     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2417     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2418   }
2419 
2420   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2421 
2422   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2423   M = header[1]; N = header[2];
2424 
2425   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2426   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2427   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2428 
2429   /* If global sizes are set, check if they are consistent with that given in the file */
2430   if (sizesset) {
2431     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2432   }
2433   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2434   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2435 
2436   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2437 
2438   /*
2439      This code adds extra rows to make sure the number of rows is
2440      divisible by the blocksize
2441   */
2442   Mbs        = M/bs;
2443   extra_rows = bs - M + bs*(Mbs);
2444   if (extra_rows == bs) extra_rows = 0;
2445   else                  Mbs++;
2446   if (extra_rows &&!rank) {
2447     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2448   }
2449 
2450   /* determine ownership of all rows */
2451   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2452     mbs        = Mbs/size + ((Mbs % size) > rank);
2453     m          = mbs*bs;
2454   } else { /* User Set */
2455     m          = newmat->rmap->n;
2456     mbs        = m/bs;
2457   }
2458   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2459   mmbs       = PetscMPIIntCast(mbs);
2460   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2461   rowners[0] = 0;
2462   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2463   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2464   rstart = rowners[rank];
2465   rend   = rowners[rank+1];
2466 
2467   /* distribute row lengths to all processors */
2468   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2469   if (!rank) {
2470     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2471     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2472     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2473     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2474     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2475     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2476     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2477   } else {
2478     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2479   }
2480 
2481   if (!rank) {   /* procs[0] */
2482     /* calculate the number of nonzeros on each processor */
2483     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2484     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2485     for (i=0; i<size; i++) {
2486       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2487         procsnz[i] += rowlengths[j];
2488       }
2489     }
2490     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2491 
2492     /* determine max buffer needed and allocate it */
2493     maxnz = 0;
2494     for (i=0; i<size; i++) {
2495       maxnz = PetscMax(maxnz,procsnz[i]);
2496     }
2497     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2498 
2499     /* read in my part of the matrix column indices  */
2500     nz     = procsnz[0];
2501     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2502     mycols = ibuf;
2503     if (size == 1)  nz -= extra_rows;
2504     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2505     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2506 
2507     /* read in every ones (except the last) and ship off */
2508     for (i=1; i<size-1; i++) {
2509       nz   = procsnz[i];
2510       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2511       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2512     }
2513     /* read in the stuff for the last proc */
2514     if (size != 1) {
2515       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2516       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2517       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2518       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2519     }
2520     ierr = PetscFree(cols);CHKERRQ(ierr);
2521   } else {  /* procs[i], i>0 */
2522     /* determine buffer space needed for message */
2523     nz = 0;
2524     for (i=0; i<m; i++) {
2525       nz += locrowlens[i];
2526     }
2527     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2528     mycols = ibuf;
2529     /* receive message of column indices*/
2530     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2531     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2532     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2533   }
2534 
2535   /* loop over local rows, determining number of off diagonal entries */
2536   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2537   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2538   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2539   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2540   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2541   rowcount = 0;
2542   nzcount  = 0;
2543   for (i=0; i<mbs; i++) {
2544     dcount  = 0;
2545     odcount = 0;
2546     for (j=0; j<bs; j++) {
2547       kmax = locrowlens[rowcount];
2548       for (k=0; k<kmax; k++) {
2549         tmp = mycols[nzcount++]/bs; /* block col. index */
2550         if (!mask[tmp]) {
2551           mask[tmp] = 1;
2552           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2553           else masked1[dcount++] = tmp; /* entry in diag portion */
2554         }
2555       }
2556       rowcount++;
2557     }
2558 
2559     dlens[i]  = dcount;  /* d_nzz[i] */
2560     odlens[i] = odcount; /* o_nzz[i] */
2561 
2562     /* zero out the mask elements we set */
2563     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2564     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2565   }
2566     if (!sizesset) {
2567     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2568   }
2569   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2570   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2571 
2572   if (!rank) {
2573     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2574     /* read in my part of the matrix numerical values  */
2575     nz = procsnz[0];
2576     vals = buf;
2577     mycols = ibuf;
2578     if (size == 1)  nz -= extra_rows;
2579     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2580     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2581 
2582     /* insert into matrix */
2583     jj      = rstart*bs;
2584     for (i=0; i<m; i++) {
2585       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2586       mycols += locrowlens[i];
2587       vals   += locrowlens[i];
2588       jj++;
2589     }
2590 
2591     /* read in other processors (except the last one) and ship out */
2592     for (i=1; i<size-1; i++) {
2593       nz   = procsnz[i];
2594       vals = buf;
2595       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2596       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2597     }
2598     /* the last proc */
2599     if (size != 1){
2600       nz   = procsnz[i] - extra_rows;
2601       vals = buf;
2602       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2603       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2604       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2605     }
2606     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2607 
2608   } else {
2609     /* receive numeric values */
2610     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2611 
2612     /* receive message of values*/
2613     vals   = buf;
2614     mycols = ibuf;
2615     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2616     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2617     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2618 
2619     /* insert into matrix */
2620     jj      = rstart*bs;
2621     for (i=0; i<m; i++) {
2622       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2623       mycols += locrowlens[i];
2624       vals   += locrowlens[i];
2625       jj++;
2626     }
2627   }
2628 
2629   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2630   ierr = PetscFree(buf);CHKERRQ(ierr);
2631   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2632   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2633   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2634   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2635   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2636   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 #undef __FUNCT__
2641 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2642 /*XXXXX@
2643    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2644 
2645    Input Parameters:
2646 .  mat  - the matrix
2647 .  fact - factor
2648 
2649    Collective on Mat
2650 
2651    Level: advanced
2652 
2653   Notes:
2654    This can also be set by the command line option: -mat_use_hash_table fact
2655 
2656 .keywords: matrix, hashtable, factor, HT
2657 
2658 .seealso: MatSetOption()
2659 @XXXXX*/
2660 
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2664 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2665 {
2666   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2667   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2668   PetscReal      atmp;
2669   PetscReal      *work,*svalues,*rvalues;
2670   PetscErrorCode ierr;
2671   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2672   PetscMPIInt    rank,size;
2673   PetscInt       *rowners_bs,dest,count,source;
2674   PetscScalar    *va;
2675   MatScalar      *ba;
2676   MPI_Status     stat;
2677 
2678   PetscFunctionBegin;
2679   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2680   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2681   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2682 
2683   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2684   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2685 
2686   bs   = A->rmap->bs;
2687   mbs  = a->mbs;
2688   Mbs  = a->Mbs;
2689   ba   = b->a;
2690   bi   = b->i;
2691   bj   = b->j;
2692 
2693   /* find ownerships */
2694   rowners_bs = A->rmap->range;
2695 
2696   /* each proc creates an array to be distributed */
2697   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2698   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2699 
2700   /* row_max for B */
2701   if (rank != size-1){
2702     for (i=0; i<mbs; i++) {
2703       ncols = bi[1] - bi[0]; bi++;
2704       brow  = bs*i;
2705       for (j=0; j<ncols; j++){
2706         bcol = bs*(*bj);
2707         for (kcol=0; kcol<bs; kcol++){
2708           col = bcol + kcol;                 /* local col index */
2709           col += rowners_bs[rank+1];      /* global col index */
2710           for (krow=0; krow<bs; krow++){
2711             atmp = PetscAbsScalar(*ba); ba++;
2712             row = brow + krow;    /* local row index */
2713             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2714             if (work[col] < atmp) work[col] = atmp;
2715           }
2716         }
2717         bj++;
2718       }
2719     }
2720 
2721     /* send values to its owners */
2722     for (dest=rank+1; dest<size; dest++){
2723       svalues = work + rowners_bs[dest];
2724       count   = rowners_bs[dest+1]-rowners_bs[dest];
2725       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2726     }
2727   }
2728 
2729   /* receive values */
2730   if (rank){
2731     rvalues = work;
2732     count   = rowners_bs[rank+1]-rowners_bs[rank];
2733     for (source=0; source<rank; source++){
2734       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2735       /* process values */
2736       for (i=0; i<count; i++){
2737         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2738       }
2739     }
2740   }
2741 
2742   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2743   ierr = PetscFree(work);CHKERRQ(ierr);
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 #undef __FUNCT__
2748 #define __FUNCT__ "MatSOR_MPISBAIJ"
2749 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2750 {
2751   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2752   PetscErrorCode    ierr;
2753   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2754   PetscScalar       *x,*ptr,*from;
2755   Vec               bb1;
2756   const PetscScalar *b;
2757 
2758   PetscFunctionBegin;
2759   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2760   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2761 
2762   if (flag == SOR_APPLY_UPPER) {
2763     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2764     PetscFunctionReturn(0);
2765   }
2766 
2767   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2768     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2769       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2770       its--;
2771     }
2772 
2773     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2774     while (its--){
2775 
2776       /* lower triangular part: slvec0b = - B^T*xx */
2777       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2778 
2779       /* copy xx into slvec0a */
2780       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2781       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2782       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2783       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2784 
2785       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2786 
2787       /* copy bb into slvec1a */
2788       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2789       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2790       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2791       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2792 
2793       /* set slvec1b = 0 */
2794       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2795 
2796       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2797       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2798       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2799       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2800 
2801       /* upper triangular part: bb1 = bb1 - B*x */
2802       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2803 
2804       /* local diagonal sweep */
2805       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2806     }
2807     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2808   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2809     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2810   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2811     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2812   } else if (flag & SOR_EISENSTAT) {
2813     Vec               xx1;
2814     PetscTruth        hasop;
2815     const PetscScalar *diag;
2816     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2817     PetscInt          i,n;
2818 
2819     if (!mat->xx1) {
2820       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2821       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2822     }
2823     xx1 = mat->xx1;
2824     bb1 = mat->bb1;
2825 
2826     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2827 
2828     if (!mat->diag) {
2829       /* this is wrong for same matrix with new nonzero values */
2830       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2831       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2832     }
2833     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2834 
2835     if (hasop) {
2836       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2837       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2838     } else {
2839       /*
2840           These two lines are replaced by code that may be a bit faster for a good compiler
2841       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2842       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2843       */
2844       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2845       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2846       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2847       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2848       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2849       if (omega == 1.0) {
2850 	for (i=0; i<n; i++) {
2851 	  sl[i] = b[i] - diag[i]*x[i];
2852 	}
2853         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2854       } else {
2855 	for (i=0; i<n; i++) {
2856 	  sl[i] = b[i] + scale*diag[i]*x[i];
2857 	}
2858         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2859       }
2860       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2861       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2862       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2863       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2864     }
2865 
2866     /* multiply off-diagonal portion of matrix */
2867     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2868     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2869     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2870     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2871     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2872     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2873     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2874     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2875     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2876     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2877 
2878     /* local sweep */
2879     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2880     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2881   } else {
2882     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2883   }
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2889 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2890 {
2891   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2892   PetscErrorCode ierr;
2893   Vec            lvec1,bb1;
2894 
2895   PetscFunctionBegin;
2896   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2897   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2898 
2899   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2900     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2901       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2902       its--;
2903     }
2904 
2905     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2906     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2907     while (its--){
2908       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2909 
2910       /* lower diagonal part: bb1 = bb - B^T*xx */
2911       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2912       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2913 
2914       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2915       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2916       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2917 
2918       /* upper diagonal part: bb1 = bb1 - B*x */
2919       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2920       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2921 
2922       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2923 
2924       /* diagonal sweep */
2925       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2926     }
2927     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2928     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2929   } else {
2930     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2931   }
2932   PetscFunctionReturn(0);
2933 }
2934 
2935