xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e4c476e2c7168ebdecf8eb81d2eb3cf049d1c626)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscBool      roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = sqrt(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscBool      other_disassembled;
522   PetscMPIInt    n;
523   PetscBool      r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
626              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
633       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
639       PetscFunctionReturn(0);
640     }
641   }
642 
643   if (isdraw) {
644     PetscDraw  draw;
645     PetscBool  isnull;
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
648   }
649 
650   if (size == 1) {
651     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
652     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
653   } else {
654     /* assemble the entire matrix onto first processor. */
655     Mat          A;
656     Mat_SeqSBAIJ *Aloc;
657     Mat_SeqBAIJ  *Bloc;
658     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
659     MatScalar    *a;
660 
661     /* Should this be the same type as mat? */
662     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
669     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
670     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
671 
672     /* copy over the A part */
673     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
674     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
675     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
676 
677     for (i=0; i<mbs; i++) {
678       rvals[0] = bs*(baij->rstartbs + i);
679       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
680       for (j=ai[i]; j<ai[i+1]; j++) {
681         col = (baij->cstartbs+aj[j])*bs;
682         for (k=0; k<bs; k++) {
683           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
684           col++; a += bs;
685         }
686       }
687     }
688     /* copy over the B part */
689     Bloc = (Mat_SeqBAIJ*)baij->B->data;
690     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
691     for (i=0; i<mbs; i++) {
692 
693       rvals[0] = bs*(baij->rstartbs + i);
694       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
695       for (j=ai[i]; j<ai[i+1]; j++) {
696         col = baij->garray[aj[j]]*bs;
697         for (k=0; k<bs; k++) {
698           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
699           col++; a += bs;
700         }
701       }
702     }
703     ierr = PetscFree(rvals);CHKERRQ(ierr);
704     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     /*
707        Everyone has to call to draw the matrix since the graphics waits are
708        synchronized across all processors that share the PetscDraw object
709     */
710     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
713           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
714       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
715       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
716     }
717     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     ierr = MatDestroy(A);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPISBAIJ"
725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
726 {
727   PetscErrorCode ierr;
728   PetscBool      iascii,isdraw,issocket,isbinary;
729 
730   PetscFunctionBegin;
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
735   if (iascii || isdraw || issocket || isbinary) {
736     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
737   } else {
738     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatDestroy_MPISBAIJ"
745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751 #if defined(PETSC_USE_LOG)
752   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
753 #endif
754   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
755   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
756   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
757   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
758 #if defined (PETSC_USE_CTABLE)
759   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
760 #else
761   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
762 #endif
763   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
764   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
765   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
766   if (baij->slvec0) {
767     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
768     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
769   }
770   if (baij->slvec1) {
771     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
772     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
773     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
774   }
775   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
776   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
777   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
778   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
779   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
780   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
781   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
782 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
783   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
784 #endif
785   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
788   ierr = PetscFree(baij);CHKERRQ(ierr);
789 
790   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
801 {
802   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
805   PetscScalar    *x,*from;
806 
807   PetscFunctionBegin;
808   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
809   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
810 
811   /* diagonal part */
812   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
813   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
814 
815   /* subdiagonal part */
816   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
817 
818   /* copy x into the vec slvec0 */
819   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
823   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825 
826   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828   /* supperdiagonal part */
829   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMult_MPISBAIJ"
835 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
836 {
837   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
838   PetscErrorCode ierr;
839   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
840   PetscScalar    *x,*from;
841 
842   PetscFunctionBegin;
843   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
844   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
879 
880   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
881   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
882 
883   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   /* do diagonal part */
885   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
886   /* do supperdiagonal part */
887   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
889   /* do subdiagonal part */
890   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
891   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
893 
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
899 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
900 {
901   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
902   PetscErrorCode ierr;
903   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
904   PetscScalar    *x,*from,zero=0.0;
905 
906   PetscFunctionBegin;
907   /*
908   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
909   PetscSynchronizedFlush(((PetscObject)A)->comm);
910   */
911   /* diagonal part */
912   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
913   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
914 
915   /* subdiagonal part */
916   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
917 
918   /* copy x into the vec slvec0 */
919   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
920   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
921   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
922   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
923 
924   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
926   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927 
928   /* supperdiagonal part */
929   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
930 
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   /* do diagonal part */
944   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
945   /* do supperdiagonal part */
946   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
948 
949   /* do subdiagonal part */
950   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
951   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
953 
954   PetscFunctionReturn(0);
955 }
956 
957 /*
958   This only works correctly for square matrices where the subblock A->A is the
959    diagonal block
960 */
961 #undef __FUNCT__
962 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
963 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
964 {
965   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
966   PetscErrorCode ierr;
967 
968   PetscFunctionBegin;
969   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
970   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "MatScale_MPISBAIJ"
976 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
977 {
978   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
979   PetscErrorCode ierr;
980 
981   PetscFunctionBegin;
982   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
983   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatGetRow_MPISBAIJ"
989 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
990 {
991   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
992   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
993   PetscErrorCode ierr;
994   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
995   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
996   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
997 
998   PetscFunctionBegin;
999   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1000   mat->getrowactive = PETSC_TRUE;
1001 
1002   if (!mat->rowvalues && (idx || v)) {
1003     /*
1004         allocate enough space to hold information from the longest row.
1005     */
1006     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1007     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1008     PetscInt     max = 1,mbs = mat->mbs,tmp;
1009     for (i=0; i<mbs; i++) {
1010       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1011       if (max < tmp) { max = tmp; }
1012     }
1013     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1014   }
1015 
1016   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1017   lrow = row - brstart;  /* local row index */
1018 
1019   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1020   if (!v)   {pvA = 0; pvB = 0;}
1021   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1022   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1023   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1024   nztot = nzA + nzB;
1025 
1026   cmap  = mat->garray;
1027   if (v  || idx) {
1028     if (nztot) {
1029       /* Sort by increasing column numbers, assuming A and B already sorted */
1030       PetscInt imark = -1;
1031       if (v) {
1032         *v = v_p = mat->rowvalues;
1033         for (i=0; i<nzB; i++) {
1034           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1035           else break;
1036         }
1037         imark = i;
1038         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1039         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1040       }
1041       if (idx) {
1042         *idx = idx_p = mat->rowindices;
1043         if (imark > -1) {
1044           for (i=0; i<imark; i++) {
1045             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1046           }
1047         } else {
1048           for (i=0; i<nzB; i++) {
1049             if (cmap[cworkB[i]/bs] < cstart)
1050               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1051             else break;
1052           }
1053           imark = i;
1054         }
1055         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1056         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1057       }
1058     } else {
1059       if (idx) *idx = 0;
1060       if (v)   *v   = 0;
1061     }
1062   }
1063   *nz = nztot;
1064   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1065   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1071 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1072 {
1073   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1074 
1075   PetscFunctionBegin;
1076   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1077   baij->getrowactive = PETSC_FALSE;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1083 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1084 {
1085   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1086   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1087 
1088   PetscFunctionBegin;
1089   aA->getrow_utriangular = PETSC_TRUE;
1090   PetscFunctionReturn(0);
1091 }
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1094 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1095 {
1096   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1097   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1098 
1099   PetscFunctionBegin;
1100   aA->getrow_utriangular = PETSC_FALSE;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1106 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1107 {
1108   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1113   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1119 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1120 {
1121   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1126   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1132 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1133 {
1134   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1139   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1145 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1146 {
1147   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1148   Mat            A = a->A,B = a->B;
1149   PetscErrorCode ierr;
1150   PetscReal      isend[5],irecv[5];
1151 
1152   PetscFunctionBegin;
1153   info->block_size     = (PetscReal)matin->rmap->bs;
1154   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1156   isend[3] = info->memory;  isend[4] = info->mallocs;
1157   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159   isend[3] += info->memory;  isend[4] += info->mallocs;
1160   if (flag == MAT_LOCAL) {
1161     info->nz_used      = isend[0];
1162     info->nz_allocated = isend[1];
1163     info->nz_unneeded  = isend[2];
1164     info->memory       = isend[3];
1165     info->mallocs      = isend[4];
1166   } else if (flag == MAT_GLOBAL_MAX) {
1167     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1168     info->nz_used      = irecv[0];
1169     info->nz_allocated = irecv[1];
1170     info->nz_unneeded  = irecv[2];
1171     info->memory       = irecv[3];
1172     info->mallocs      = irecv[4];
1173   } else if (flag == MAT_GLOBAL_SUM) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   } else {
1181     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1182   }
1183   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1184   info->fill_ratio_needed = 0;
1185   info->factor_mallocs    = 0;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1191 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1192 {
1193   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1194   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   switch (op) {
1199   case MAT_NEW_NONZERO_LOCATIONS:
1200   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1201   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1202   case MAT_KEEP_NONZERO_PATTERN:
1203   case MAT_NEW_NONZERO_LOCATION_ERR:
1204     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1206     break;
1207   case MAT_ROW_ORIENTED:
1208     a->roworiented = flg;
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_NEW_DIAGONALS:
1213     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1214     break;
1215   case MAT_IGNORE_OFF_PROC_ENTRIES:
1216     a->donotstash = flg;
1217     break;
1218   case MAT_USE_HASH_TABLE:
1219     a->ht_flag = flg;
1220     break;
1221   case MAT_HERMITIAN:
1222     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1223     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1224     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1225     break;
1226   case MAT_SPD:
1227     A->spd_set                         = PETSC_TRUE;
1228     A->spd                             = flg;
1229     if (flg) {
1230       A->symmetric                     = PETSC_TRUE;
1231       A->structurally_symmetric        = PETSC_TRUE;
1232       A->symmetric_set                 = PETSC_TRUE;
1233       A->structurally_symmetric_set    = PETSC_TRUE;
1234     }
1235     break;
1236   case MAT_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_STRUCTURALLY_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1241     break;
1242   case MAT_SYMMETRY_ETERNAL:
1243     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1244     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1245     break;
1246   case MAT_IGNORE_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_ERROR_LOWER_TRIANGULAR:
1250     aA->ignore_ltriangular = flg;
1251     break;
1252   case MAT_GETROW_UPPERTRIANGULAR:
1253     aA->getrow_utriangular = flg;
1254     break;
1255   default:
1256     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1263 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1264 {
1265   PetscErrorCode ierr;
1266   PetscFunctionBegin;
1267   if (MAT_INITIAL_MATRIX || *B != A) {
1268     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1275 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1276 {
1277   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1278   Mat            a=baij->A, b=baij->B;
1279   PetscErrorCode ierr;
1280   PetscInt       nv,m,n;
1281   PetscBool      flg;
1282 
1283   PetscFunctionBegin;
1284   if (ll != rr){
1285     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1286     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1287   }
1288   if (!ll) PetscFunctionReturn(0);
1289 
1290   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1291   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1292 
1293   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1294   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1295 
1296   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297 
1298   /* left diagonalscale the off-diagonal part */
1299   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1300 
1301   /* scale the diagonal part */
1302   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1303 
1304   /* right diagonalscale the off-diagonal part */
1305   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1312 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatEqual_MPISBAIJ"
1326 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1327 {
1328   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1329   Mat            a,b,c,d;
1330   PetscBool      flg;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   a = matA->A; b = matA->B;
1335   c = matB->A; d = matB->B;
1336 
1337   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1338   if (flg) {
1339     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1340   }
1341   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatCopy_MPISBAIJ"
1347 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1348 {
1349   PetscErrorCode ierr;
1350   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1351   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1352 
1353   PetscFunctionBegin;
1354   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1355   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1356     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1357     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1358     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1359   } else {
1360     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1361     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1368 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1379 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1380 {
1381   PetscErrorCode ierr;
1382   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1383   PetscBLASInt   bnz,one=1;
1384   Mat_SeqSBAIJ   *xa,*ya;
1385   Mat_SeqBAIJ    *xb,*yb;
1386 
1387   PetscFunctionBegin;
1388   if (str == SAME_NONZERO_PATTERN) {
1389     PetscScalar alpha = a;
1390     xa = (Mat_SeqSBAIJ *)xx->A->data;
1391     ya = (Mat_SeqSBAIJ *)yy->A->data;
1392     bnz = PetscBLASIntCast(xa->nz);
1393     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1394     xb = (Mat_SeqBAIJ *)xx->B->data;
1395     yb = (Mat_SeqBAIJ *)yy->B->data;
1396     bnz = PetscBLASIntCast(xb->nz);
1397     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1398   } else {
1399     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1400     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1401     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1408 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1409 {
1410   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1411   PetscInt        rbs,cbs;
1412   PetscErrorCode  ierr;
1413 
1414   PetscFunctionBegin;
1415   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1416   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1417   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1418   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1419   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1420   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1426 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1427 {
1428   PetscErrorCode ierr;
1429   PetscInt       i;
1430   PetscBool      flg;
1431 
1432   PetscFunctionBegin;
1433   for (i=0; i<n; i++) {
1434     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1435     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1436   }
1437   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 
1442 /* -------------------------------------------------------------------*/
1443 static struct _MatOps MatOps_Values = {
1444        MatSetValues_MPISBAIJ,
1445        MatGetRow_MPISBAIJ,
1446        MatRestoreRow_MPISBAIJ,
1447        MatMult_MPISBAIJ,
1448 /* 4*/ MatMultAdd_MPISBAIJ,
1449        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1450        MatMultAdd_MPISBAIJ,
1451        0,
1452        0,
1453        0,
1454 /*10*/ 0,
1455        0,
1456        0,
1457        MatSOR_MPISBAIJ,
1458        MatTranspose_MPISBAIJ,
1459 /*15*/ MatGetInfo_MPISBAIJ,
1460        MatEqual_MPISBAIJ,
1461        MatGetDiagonal_MPISBAIJ,
1462        MatDiagonalScale_MPISBAIJ,
1463        MatNorm_MPISBAIJ,
1464 /*20*/ MatAssemblyBegin_MPISBAIJ,
1465        MatAssemblyEnd_MPISBAIJ,
1466        MatSetOption_MPISBAIJ,
1467        MatZeroEntries_MPISBAIJ,
1468 /*24*/ 0,
1469        0,
1470        0,
1471        0,
1472        0,
1473 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1474        0,
1475        0,
1476        0,
1477        0,
1478 /*34*/ MatDuplicate_MPISBAIJ,
1479        0,
1480        0,
1481        0,
1482        0,
1483 /*39*/ MatAXPY_MPISBAIJ,
1484        MatGetSubMatrices_MPISBAIJ,
1485        MatIncreaseOverlap_MPISBAIJ,
1486        MatGetValues_MPISBAIJ,
1487        MatCopy_MPISBAIJ,
1488 /*44*/ 0,
1489        MatScale_MPISBAIJ,
1490        0,
1491        0,
1492        0,
1493 /*49*/ MatSetBlockSize_MPISBAIJ,
1494        0,
1495        0,
1496        0,
1497        0,
1498 /*54*/ 0,
1499        0,
1500        MatSetUnfactored_MPISBAIJ,
1501        0,
1502        MatSetValuesBlocked_MPISBAIJ,
1503 /*59*/ 0,
1504        0,
1505        0,
1506        0,
1507        0,
1508 /*64*/ 0,
1509        0,
1510        0,
1511        0,
1512        0,
1513 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*74*/ 0,
1519        0,
1520        0,
1521        0,
1522        0,
1523 /*79*/ 0,
1524        0,
1525        0,
1526        0,
1527        MatLoad_MPISBAIJ,
1528 /*84*/ 0,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*89*/ 0,
1534        0,
1535        0,
1536        0,
1537        0,
1538 /*94*/ 0,
1539        0,
1540        0,
1541        0,
1542        0,
1543 /*99*/ 0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*104*/0,
1549        MatRealPart_MPISBAIJ,
1550        MatImaginaryPart_MPISBAIJ,
1551        MatGetRowUpperTriangular_MPISBAIJ,
1552        MatRestoreRowUpperTriangular_MPISBAIJ,
1553 /*109*/0,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*114*/0,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*119*/0,
1564        0,
1565        0,
1566        0
1567 };
1568 
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1573 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
1574 {
1575   PetscFunctionBegin;
1576   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1577   *iscopy = PETSC_FALSE;
1578   PetscFunctionReturn(0);
1579 }
1580 EXTERN_C_END
1581 
1582 EXTERN_C_BEGIN
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1585 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1586 {
1587   Mat_MPISBAIJ   *b;
1588   PetscErrorCode ierr;
1589   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1590 
1591   PetscFunctionBegin;
1592   if (bs < 0){
1593     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1594       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1595     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1596     bs   = PetscAbs(bs);
1597   }
1598   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1599   bs = newbs;
1600 
1601   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1602   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1603   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1604   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1605 
1606   B->rmap->bs = B->cmap->bs = bs;
1607   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1608   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1609 
1610   if (d_nnz) {
1611     for (i=0; i<B->rmap->n/bs; i++) {
1612       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1613     }
1614   }
1615   if (o_nnz) {
1616     for (i=0; i<B->rmap->n/bs; i++) {
1617       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1618     }
1619   }
1620 
1621   b   = (Mat_MPISBAIJ*)B->data;
1622   mbs = B->rmap->n/bs;
1623   Mbs = B->rmap->N/bs;
1624   if (mbs*bs != B->rmap->n) {
1625     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1626   }
1627 
1628   B->rmap->bs  = bs;
1629   b->bs2 = bs*bs;
1630   b->mbs = mbs;
1631   b->nbs = mbs;
1632   b->Mbs = Mbs;
1633   b->Nbs = Mbs;
1634 
1635   for (i=0; i<=b->size; i++) {
1636     b->rangebs[i] = B->rmap->range[i]/bs;
1637   }
1638   b->rstartbs = B->rmap->rstart/bs;
1639   b->rendbs   = B->rmap->rend/bs;
1640 
1641   b->cstartbs = B->cmap->rstart/bs;
1642   b->cendbs   = B->cmap->rend/bs;
1643 
1644   if (!B->preallocated) {
1645     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1646     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1647     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1648     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1649     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1650     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1651     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1652     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1653     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1654   }
1655 
1656   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1657   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1658   B->preallocated = PETSC_TRUE;
1659   PetscFunctionReturn(0);
1660 }
1661 EXTERN_C_END
1662 
1663 EXTERN_C_BEGIN
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1666 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1667 {
1668   PetscInt       m,rstart,cstart,cend;
1669   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1670   const PetscInt *JJ=0;
1671   PetscScalar    *values=0;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675 
1676   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1677   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1678   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1679   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1680   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1681   m      = B->rmap->n/bs;
1682   rstart = B->rmap->rstart/bs;
1683   cstart = B->cmap->rstart/bs;
1684   cend   = B->cmap->rend/bs;
1685 
1686   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1687   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1688   for (i=0; i<m; i++) {
1689     nz = ii[i+1] - ii[i];
1690     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1691     nz_max = PetscMax(nz_max,nz);
1692     JJ  = jj + ii[i];
1693     for (j=0; j<nz; j++) {
1694       if (*JJ >= cstart) break;
1695       JJ++;
1696     }
1697     d = 0;
1698     for (; j<nz; j++) {
1699       if (*JJ++ >= cend) break;
1700       d++;
1701     }
1702     d_nnz[i] = d;
1703     o_nnz[i] = nz - d;
1704   }
1705   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1706   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1707 
1708   values = (PetscScalar*)V;
1709   if (!values) {
1710     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1711     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1712   }
1713   for (i=0; i<m; i++) {
1714     PetscInt          row    = i + rstart;
1715     PetscInt          ncols  = ii[i+1] - ii[i];
1716     const PetscInt    *icols = jj + ii[i];
1717     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1718     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1719   }
1720 
1721   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1722   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724 
1725   PetscFunctionReturn(0);
1726 }
1727 EXTERN_C_END
1728 
1729 EXTERN_C_BEGIN
1730 #if defined(PETSC_HAVE_MUMPS)
1731 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1732 #endif
1733 #if defined(PETSC_HAVE_SPOOLES)
1734 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1735 #endif
1736 #if defined(PETSC_HAVE_PASTIX)
1737 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1738 #endif
1739 EXTERN_C_END
1740 
1741 /*MC
1742    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1743    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1744 
1745    Options Database Keys:
1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1747 
1748   Level: beginner
1749 
1750 .seealso: MatCreateMPISBAIJ
1751 M*/
1752 
1753 EXTERN_C_BEGIN
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatCreate_MPISBAIJ"
1756 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1757 {
1758   Mat_MPISBAIJ   *b;
1759   PetscErrorCode ierr;
1760   PetscBool      flg;
1761 
1762   PetscFunctionBegin;
1763 
1764   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1765   B->data = (void*)b;
1766   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1767 
1768   B->ops->destroy    = MatDestroy_MPISBAIJ;
1769   B->ops->view       = MatView_MPISBAIJ;
1770   B->mapping         = 0;
1771   B->assembled       = PETSC_FALSE;
1772 
1773   B->insertmode = NOT_SET_VALUES;
1774   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1775   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1776 
1777   /* build local table of row and column ownerships */
1778   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1779 
1780   /* build cache for off array entries formed */
1781   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1782   b->donotstash  = PETSC_FALSE;
1783   b->colmap      = PETSC_NULL;
1784   b->garray      = PETSC_NULL;
1785   b->roworiented = PETSC_TRUE;
1786 
1787   /* stuff used in block assembly */
1788   b->barray       = 0;
1789 
1790   /* stuff used for matrix vector multiply */
1791   b->lvec         = 0;
1792   b->Mvctx        = 0;
1793   b->slvec0       = 0;
1794   b->slvec0b      = 0;
1795   b->slvec1       = 0;
1796   b->slvec1a      = 0;
1797   b->slvec1b      = 0;
1798   b->sMvctx       = 0;
1799 
1800   /* stuff for MatGetRow() */
1801   b->rowindices   = 0;
1802   b->rowvalues    = 0;
1803   b->getrowactive = PETSC_FALSE;
1804 
1805   /* hash table stuff */
1806   b->ht           = 0;
1807   b->hd           = 0;
1808   b->ht_size      = 0;
1809   b->ht_flag      = PETSC_FALSE;
1810   b->ht_fact      = 0;
1811   b->ht_total_ct  = 0;
1812   b->ht_insert_ct = 0;
1813 
1814   b->in_loc       = 0;
1815   b->v_loc        = 0;
1816   b->n_loc        = 0;
1817   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1818     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1819     if (flg) {
1820       PetscReal fact = 1.39;
1821       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1822       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1823       if (fact <= 1.0) fact = 1.39;
1824       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1825       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1826     }
1827   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1828 
1829 #if defined(PETSC_HAVE_PASTIX)
1830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1831 					   "MatGetFactor_mpisbaij_pastix",
1832 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1833 #endif
1834 #if defined(PETSC_HAVE_MUMPS)
1835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1836                                      "MatGetFactor_sbaij_mumps",
1837                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1838 #endif
1839 #if defined(PETSC_HAVE_SPOOLES)
1840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1841                                      "MatGetFactor_mpisbaij_spooles",
1842                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1843 #endif
1844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1845                                      "MatStoreValues_MPISBAIJ",
1846                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1848                                      "MatRetrieveValues_MPISBAIJ",
1849                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1851                                      "MatGetDiagonalBlock_MPISBAIJ",
1852                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1854                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1855                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1857                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1858                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1859   B->symmetric                  = PETSC_TRUE;
1860   B->structurally_symmetric     = PETSC_TRUE;
1861   B->symmetric_set              = PETSC_TRUE;
1862   B->structurally_symmetric_set = PETSC_TRUE;
1863   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 EXTERN_C_END
1867 
1868 /*MC
1869    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1870 
1871    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1872    and MATMPISBAIJ otherwise.
1873 
1874    Options Database Keys:
1875 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1876 
1877   Level: beginner
1878 
1879 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1880 M*/
1881 
1882 EXTERN_C_BEGIN
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatCreate_SBAIJ"
1885 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1886 {
1887   PetscErrorCode ierr;
1888   PetscMPIInt    size;
1889 
1890   PetscFunctionBegin;
1891   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1892   if (size == 1) {
1893     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1894   } else {
1895     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1896   }
1897   PetscFunctionReturn(0);
1898 }
1899 EXTERN_C_END
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1903 /*@C
1904    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1905    the user should preallocate the matrix storage by setting the parameters
1906    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1907    performance can be increased by more than a factor of 50.
1908 
1909    Collective on Mat
1910 
1911    Input Parameters:
1912 +  A - the matrix
1913 .  bs   - size of blockk
1914 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1915            submatrix  (same for all local rows)
1916 .  d_nnz - array containing the number of block nonzeros in the various block rows
1917            in the upper triangular and diagonal part of the in diagonal portion of the local
1918            (possibly different for each block row) or PETSC_NULL.  You must leave room
1919            for the diagonal entry even if it is zero.
1920 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1921            submatrix (same for all local rows).
1922 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1923            off-diagonal portion of the local submatrix (possibly different for
1924            each block row) or PETSC_NULL.
1925 
1926 
1927    Options Database Keys:
1928 .   -mat_no_unroll - uses code that does not unroll the loops in the
1929                      block calculations (much slower)
1930 .   -mat_block_size - size of the blocks to use
1931 
1932    Notes:
1933 
1934    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1935    than it must be used on all processors that share the object for that argument.
1936 
1937    If the *_nnz parameter is given then the *_nz parameter is ignored
1938 
1939    Storage Information:
1940    For a square global matrix we define each processor's diagonal portion
1941    to be its local rows and the corresponding columns (a square submatrix);
1942    each processor's off-diagonal portion encompasses the remainder of the
1943    local matrix (a rectangular submatrix).
1944 
1945    The user can specify preallocated storage for the diagonal part of
1946    the local submatrix with either d_nz or d_nnz (not both).  Set
1947    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1948    memory allocation.  Likewise, specify preallocated storage for the
1949    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1950 
1951    You can call MatGetInfo() to get information on how effective the preallocation was;
1952    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1953    You can also run with the option -info and look for messages with the string
1954    malloc in them to see if additional memory allocation was needed.
1955 
1956    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1957    the figure below we depict these three local rows and all columns (0-11).
1958 
1959 .vb
1960            0 1 2 3 4 5 6 7 8 9 10 11
1961           -------------------
1962    row 3  |  o o o d d d o o o o o o
1963    row 4  |  o o o d d d o o o o o o
1964    row 5  |  o o o d d d o o o o o o
1965           -------------------
1966 .ve
1967 
1968    Thus, any entries in the d locations are stored in the d (diagonal)
1969    submatrix, and any entries in the o locations are stored in the
1970    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1971    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1972 
1973    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1974    plus the diagonal part of the d matrix,
1975    and o_nz should indicate the number of block nonzeros per row in the upper triangular
1976    part of the o matrix.
1977    In general, for PDE problems in which most nonzeros are near the diagonal,
1978    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1979    or you will get TERRIBLE performance; see the users' manual chapter on
1980    matrices.
1981 
1982    Level: intermediate
1983 
1984 .keywords: matrix, block, aij, compressed row, sparse, parallel
1985 
1986 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1987 @*/
1988 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1989 {
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatCreateMPISBAIJ"
1999 /*@C
2000    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2001    (block compressed row).  For good matrix assembly performance
2002    the user should preallocate the matrix storage by setting the parameters
2003    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2004    performance can be increased by more than a factor of 50.
2005 
2006    Collective on MPI_Comm
2007 
2008    Input Parameters:
2009 +  comm - MPI communicator
2010 .  bs   - size of blockk
2011 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2012            This value should be the same as the local size used in creating the
2013            y vector for the matrix-vector product y = Ax.
2014 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2015            This value should be the same as the local size used in creating the
2016            x vector for the matrix-vector product y = Ax.
2017 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2018 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2019 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2020            submatrix  (same for all local rows)
2021 .  d_nnz - array containing the number of block nonzeros in the various block rows
2022            in the upper triangular portion of the in diagonal portion of the local
2023            (possibly different for each block block row) or PETSC_NULL.
2024            You must leave room for the diagonal entry even if it is zero.
2025 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2026            submatrix (same for all local rows).
2027 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2028            off-diagonal portion of the local submatrix (possibly different for
2029            each block row) or PETSC_NULL.
2030 
2031    Output Parameter:
2032 .  A - the matrix
2033 
2034    Options Database Keys:
2035 .   -mat_no_unroll - uses code that does not unroll the loops in the
2036                      block calculations (much slower)
2037 .   -mat_block_size - size of the blocks to use
2038 .   -mat_mpi - use the parallel matrix data structures even on one processor
2039                (defaults to using SeqBAIJ format on one processor)
2040 
2041    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2042    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2043    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2044 
2045    Notes:
2046    The number of rows and columns must be divisible by blocksize.
2047    This matrix type does not support complex Hermitian operation.
2048 
2049    The user MUST specify either the local or global matrix dimensions
2050    (possibly both).
2051 
2052    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2053    than it must be used on all processors that share the object for that argument.
2054 
2055    If the *_nnz parameter is given then the *_nz parameter is ignored
2056 
2057    Storage Information:
2058    For a square global matrix we define each processor's diagonal portion
2059    to be its local rows and the corresponding columns (a square submatrix);
2060    each processor's off-diagonal portion encompasses the remainder of the
2061    local matrix (a rectangular submatrix).
2062 
2063    The user can specify preallocated storage for the diagonal part of
2064    the local submatrix with either d_nz or d_nnz (not both).  Set
2065    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2066    memory allocation.  Likewise, specify preallocated storage for the
2067    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2068 
2069    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2070    the figure below we depict these three local rows and all columns (0-11).
2071 
2072 .vb
2073            0 1 2 3 4 5 6 7 8 9 10 11
2074           -------------------
2075    row 3  |  o o o d d d o o o o o o
2076    row 4  |  o o o d d d o o o o o o
2077    row 5  |  o o o d d d o o o o o o
2078           -------------------
2079 .ve
2080 
2081    Thus, any entries in the d locations are stored in the d (diagonal)
2082    submatrix, and any entries in the o locations are stored in the
2083    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2084    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2085 
2086    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2087    plus the diagonal part of the d matrix,
2088    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2089    In general, for PDE problems in which most nonzeros are near the diagonal,
2090    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2091    or you will get TERRIBLE performance; see the users' manual chapter on
2092    matrices.
2093 
2094    Level: intermediate
2095 
2096 .keywords: matrix, block, aij, compressed row, sparse, parallel
2097 
2098 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2099 @*/
2100 
2101 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2102 {
2103   PetscErrorCode ierr;
2104   PetscMPIInt    size;
2105 
2106   PetscFunctionBegin;
2107   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2108   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2110   if (size > 1) {
2111     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2112     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2113   } else {
2114     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2115     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2116   }
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2123 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2124 {
2125   Mat            mat;
2126   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2127   PetscErrorCode ierr;
2128   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2129   PetscScalar    *array;
2130 
2131   PetscFunctionBegin;
2132   *newmat       = 0;
2133   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2134   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2135   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2136   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2137   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2138   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2139 
2140   mat->factortype   = matin->factortype;
2141   mat->preallocated = PETSC_TRUE;
2142   mat->assembled    = PETSC_TRUE;
2143   mat->insertmode   = NOT_SET_VALUES;
2144 
2145   a = (Mat_MPISBAIJ*)mat->data;
2146   a->bs2   = oldmat->bs2;
2147   a->mbs   = oldmat->mbs;
2148   a->nbs   = oldmat->nbs;
2149   a->Mbs   = oldmat->Mbs;
2150   a->Nbs   = oldmat->Nbs;
2151 
2152 
2153   a->size         = oldmat->size;
2154   a->rank         = oldmat->rank;
2155   a->donotstash   = oldmat->donotstash;
2156   a->roworiented  = oldmat->roworiented;
2157   a->rowindices   = 0;
2158   a->rowvalues    = 0;
2159   a->getrowactive = PETSC_FALSE;
2160   a->barray       = 0;
2161   a->rstartbs    = oldmat->rstartbs;
2162   a->rendbs      = oldmat->rendbs;
2163   a->cstartbs    = oldmat->cstartbs;
2164   a->cendbs      = oldmat->cendbs;
2165 
2166   /* hash table stuff */
2167   a->ht           = 0;
2168   a->hd           = 0;
2169   a->ht_size      = 0;
2170   a->ht_flag      = oldmat->ht_flag;
2171   a->ht_fact      = oldmat->ht_fact;
2172   a->ht_total_ct  = 0;
2173   a->ht_insert_ct = 0;
2174 
2175   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2176   if (oldmat->colmap) {
2177 #if defined (PETSC_USE_CTABLE)
2178     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2179 #else
2180     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2181     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2182     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2183 #endif
2184   } else a->colmap = 0;
2185 
2186   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2187     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2188     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2189     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2190   } else a->garray = 0;
2191 
2192   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2193   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2194   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2195   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2196   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2197 
2198   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2199   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2200   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2201   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2202 
2203   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2204   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2205   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2206   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2207   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2208   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2209   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2210   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2211   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2212   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2213   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2214   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2215   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2216 
2217   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2218   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2219   a->sMvctx = oldmat->sMvctx;
2220   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2221 
2222   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2223   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2224   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2225   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2226   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2227   *newmat = mat;
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatLoad_MPISBAIJ"
2233 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2234 {
2235   PetscErrorCode ierr;
2236   PetscInt       i,nz,j,rstart,rend;
2237   PetscScalar    *vals,*buf;
2238   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2239   MPI_Status     status;
2240   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2241   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2242   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2243   PetscInt       bs=1,Mbs,mbs,extra_rows;
2244   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2245   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2246   int            fd;
2247 
2248   PetscFunctionBegin;
2249   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2250     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2251   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2252 
2253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2254   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2255   if (!rank) {
2256     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2257     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2258     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2259     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2260   }
2261 
2262   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2263 
2264   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2265   M = header[1]; N = header[2];
2266 
2267   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2268   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2269   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2270 
2271   /* If global sizes are set, check if they are consistent with that given in the file */
2272   if (sizesset) {
2273     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2274   }
2275   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2276   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2277 
2278   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2279 
2280   /*
2281      This code adds extra rows to make sure the number of rows is
2282      divisible by the blocksize
2283   */
2284   Mbs        = M/bs;
2285   extra_rows = bs - M + bs*(Mbs);
2286   if (extra_rows == bs) extra_rows = 0;
2287   else                  Mbs++;
2288   if (extra_rows &&!rank) {
2289     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2290   }
2291 
2292   /* determine ownership of all rows */
2293   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2294     mbs        = Mbs/size + ((Mbs % size) > rank);
2295     m          = mbs*bs;
2296   } else { /* User Set */
2297     m          = newmat->rmap->n;
2298     mbs        = m/bs;
2299   }
2300   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2301   mmbs       = PetscMPIIntCast(mbs);
2302   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2303   rowners[0] = 0;
2304   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2305   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2306   rstart = rowners[rank];
2307   rend   = rowners[rank+1];
2308 
2309   /* distribute row lengths to all processors */
2310   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2311   if (!rank) {
2312     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2313     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2314     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2315     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2316     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2317     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2318     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2319   } else {
2320     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2321   }
2322 
2323   if (!rank) {   /* procs[0] */
2324     /* calculate the number of nonzeros on each processor */
2325     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2326     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2327     for (i=0; i<size; i++) {
2328       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2329         procsnz[i] += rowlengths[j];
2330       }
2331     }
2332     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2333 
2334     /* determine max buffer needed and allocate it */
2335     maxnz = 0;
2336     for (i=0; i<size; i++) {
2337       maxnz = PetscMax(maxnz,procsnz[i]);
2338     }
2339     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2340 
2341     /* read in my part of the matrix column indices  */
2342     nz     = procsnz[0];
2343     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2344     mycols = ibuf;
2345     if (size == 1)  nz -= extra_rows;
2346     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2347     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2348 
2349     /* read in every ones (except the last) and ship off */
2350     for (i=1; i<size-1; i++) {
2351       nz   = procsnz[i];
2352       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2353       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2354     }
2355     /* read in the stuff for the last proc */
2356     if (size != 1) {
2357       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2358       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2359       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2360       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2361     }
2362     ierr = PetscFree(cols);CHKERRQ(ierr);
2363   } else {  /* procs[i], i>0 */
2364     /* determine buffer space needed for message */
2365     nz = 0;
2366     for (i=0; i<m; i++) {
2367       nz += locrowlens[i];
2368     }
2369     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2370     mycols = ibuf;
2371     /* receive message of column indices*/
2372     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2373     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2374     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2375   }
2376 
2377   /* loop over local rows, determining number of off diagonal entries */
2378   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2379   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2380   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2381   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2382   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2383   rowcount = 0;
2384   nzcount  = 0;
2385   for (i=0; i<mbs; i++) {
2386     dcount  = 0;
2387     odcount = 0;
2388     for (j=0; j<bs; j++) {
2389       kmax = locrowlens[rowcount];
2390       for (k=0; k<kmax; k++) {
2391         tmp = mycols[nzcount++]/bs; /* block col. index */
2392         if (!mask[tmp]) {
2393           mask[tmp] = 1;
2394           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2395           else masked1[dcount++] = tmp; /* entry in diag portion */
2396         }
2397       }
2398       rowcount++;
2399     }
2400 
2401     dlens[i]  = dcount;  /* d_nzz[i] */
2402     odlens[i] = odcount; /* o_nzz[i] */
2403 
2404     /* zero out the mask elements we set */
2405     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2406     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2407   }
2408     if (!sizesset) {
2409     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2410   }
2411   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2412   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2413 
2414   if (!rank) {
2415     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2416     /* read in my part of the matrix numerical values  */
2417     nz = procsnz[0];
2418     vals = buf;
2419     mycols = ibuf;
2420     if (size == 1)  nz -= extra_rows;
2421     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2422     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2423 
2424     /* insert into matrix */
2425     jj      = rstart*bs;
2426     for (i=0; i<m; i++) {
2427       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2428       mycols += locrowlens[i];
2429       vals   += locrowlens[i];
2430       jj++;
2431     }
2432 
2433     /* read in other processors (except the last one) and ship out */
2434     for (i=1; i<size-1; i++) {
2435       nz   = procsnz[i];
2436       vals = buf;
2437       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2438       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2439     }
2440     /* the last proc */
2441     if (size != 1){
2442       nz   = procsnz[i] - extra_rows;
2443       vals = buf;
2444       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2445       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2446       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2447     }
2448     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2449 
2450   } else {
2451     /* receive numeric values */
2452     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2453 
2454     /* receive message of values*/
2455     vals   = buf;
2456     mycols = ibuf;
2457     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2458     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2459     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2460 
2461     /* insert into matrix */
2462     jj      = rstart*bs;
2463     for (i=0; i<m; i++) {
2464       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2465       mycols += locrowlens[i];
2466       vals   += locrowlens[i];
2467       jj++;
2468     }
2469   }
2470 
2471   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2472   ierr = PetscFree(buf);CHKERRQ(ierr);
2473   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2474   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2475   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2476   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2477   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2478   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2484 /*XXXXX@
2485    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2486 
2487    Input Parameters:
2488 .  mat  - the matrix
2489 .  fact - factor
2490 
2491    Not Collective on Mat, each process can have a different hash factor
2492 
2493    Level: advanced
2494 
2495   Notes:
2496    This can also be set by the command line option: -mat_use_hash_table fact
2497 
2498 .keywords: matrix, hashtable, factor, HT
2499 
2500 .seealso: MatSetOption()
2501 @XXXXX*/
2502 
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2506 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2507 {
2508   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2509   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2510   PetscReal      atmp;
2511   PetscReal      *work,*svalues,*rvalues;
2512   PetscErrorCode ierr;
2513   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2514   PetscMPIInt    rank,size;
2515   PetscInt       *rowners_bs,dest,count,source;
2516   PetscScalar    *va;
2517   MatScalar      *ba;
2518   MPI_Status     stat;
2519 
2520   PetscFunctionBegin;
2521   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2522   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2523   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2524 
2525   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2526   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2527 
2528   bs   = A->rmap->bs;
2529   mbs  = a->mbs;
2530   Mbs  = a->Mbs;
2531   ba   = b->a;
2532   bi   = b->i;
2533   bj   = b->j;
2534 
2535   /* find ownerships */
2536   rowners_bs = A->rmap->range;
2537 
2538   /* each proc creates an array to be distributed */
2539   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2540   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2541 
2542   /* row_max for B */
2543   if (rank != size-1){
2544     for (i=0; i<mbs; i++) {
2545       ncols = bi[1] - bi[0]; bi++;
2546       brow  = bs*i;
2547       for (j=0; j<ncols; j++){
2548         bcol = bs*(*bj);
2549         for (kcol=0; kcol<bs; kcol++){
2550           col = bcol + kcol;                 /* local col index */
2551           col += rowners_bs[rank+1];      /* global col index */
2552           for (krow=0; krow<bs; krow++){
2553             atmp = PetscAbsScalar(*ba); ba++;
2554             row = brow + krow;    /* local row index */
2555             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2556             if (work[col] < atmp) work[col] = atmp;
2557           }
2558         }
2559         bj++;
2560       }
2561     }
2562 
2563     /* send values to its owners */
2564     for (dest=rank+1; dest<size; dest++){
2565       svalues = work + rowners_bs[dest];
2566       count   = rowners_bs[dest+1]-rowners_bs[dest];
2567       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2568     }
2569   }
2570 
2571   /* receive values */
2572   if (rank){
2573     rvalues = work;
2574     count   = rowners_bs[rank+1]-rowners_bs[rank];
2575     for (source=0; source<rank; source++){
2576       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2577       /* process values */
2578       for (i=0; i<count; i++){
2579         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2580       }
2581     }
2582   }
2583 
2584   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2585   ierr = PetscFree(work);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 #undef __FUNCT__
2590 #define __FUNCT__ "MatSOR_MPISBAIJ"
2591 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2592 {
2593   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2594   PetscErrorCode    ierr;
2595   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2596   PetscScalar       *x,*ptr,*from;
2597   Vec               bb1;
2598   const PetscScalar *b;
2599 
2600   PetscFunctionBegin;
2601   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2602   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2603 
2604   if (flag == SOR_APPLY_UPPER) {
2605     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2606     PetscFunctionReturn(0);
2607   }
2608 
2609   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2610     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2611       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2612       its--;
2613     }
2614 
2615     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2616     while (its--){
2617 
2618       /* lower triangular part: slvec0b = - B^T*xx */
2619       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2620 
2621       /* copy xx into slvec0a */
2622       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2623       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2624       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2625       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2626 
2627       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2628 
2629       /* copy bb into slvec1a */
2630       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2631       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2632       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2633       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2634 
2635       /* set slvec1b = 0 */
2636       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2637 
2638       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2639       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2640       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2641       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2642 
2643       /* upper triangular part: bb1 = bb1 - B*x */
2644       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2645 
2646       /* local diagonal sweep */
2647       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2648     }
2649     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2650   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2651     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2652   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2653     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2654   } else if (flag & SOR_EISENSTAT) {
2655     Vec               xx1;
2656     PetscBool         hasop;
2657     const PetscScalar *diag;
2658     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2659     PetscInt          i,n;
2660 
2661     if (!mat->xx1) {
2662       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2663       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2664     }
2665     xx1 = mat->xx1;
2666     bb1 = mat->bb1;
2667 
2668     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2669 
2670     if (!mat->diag) {
2671       /* this is wrong for same matrix with new nonzero values */
2672       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2673       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2674     }
2675     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2676 
2677     if (hasop) {
2678       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2679       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2680     } else {
2681       /*
2682           These two lines are replaced by code that may be a bit faster for a good compiler
2683       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2684       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2685       */
2686       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2687       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2688       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2689       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2690       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2691       if (omega == 1.0) {
2692 	for (i=0; i<n; i++) {
2693 	  sl[i] = b[i] - diag[i]*x[i];
2694 	}
2695         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2696       } else {
2697 	for (i=0; i<n; i++) {
2698 	  sl[i] = b[i] + scale*diag[i]*x[i];
2699 	}
2700         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2701       }
2702       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2703       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2704       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2705       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2706     }
2707 
2708     /* multiply off-diagonal portion of matrix */
2709     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2710     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2711     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2712     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2713     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2714     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2715     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2716     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2719 
2720     /* local sweep */
2721     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2722     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2723   } else {
2724     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2725   }
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2731 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2732 {
2733   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2734   PetscErrorCode ierr;
2735   Vec            lvec1,bb1;
2736 
2737   PetscFunctionBegin;
2738   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2739   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2740 
2741   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2742     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2743       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2744       its--;
2745     }
2746 
2747     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2748     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2749     while (its--){
2750       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2751 
2752       /* lower diagonal part: bb1 = bb - B^T*xx */
2753       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2754       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2755 
2756       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2757       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2758       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2759 
2760       /* upper diagonal part: bb1 = bb1 - B*x */
2761       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2762       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2763 
2764       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2765 
2766       /* diagonal sweep */
2767       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2768     }
2769     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2770     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2771   } else {
2772     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2773   }
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2779 /*@
2780      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2781          CSR format the local rows.
2782 
2783    Collective on MPI_Comm
2784 
2785    Input Parameters:
2786 +  comm - MPI communicator
2787 .  bs - the block size, only a block size of 1 is supported
2788 .  m - number of local rows (Cannot be PETSC_DECIDE)
2789 .  n - This value should be the same as the local size used in creating the
2790        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2791        calculated if N is given) For square matrices n is almost always m.
2792 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2793 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2794 .   i - row indices
2795 .   j - column indices
2796 -   a - matrix values
2797 
2798    Output Parameter:
2799 .   mat - the matrix
2800 
2801    Level: intermediate
2802 
2803    Notes:
2804        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2805      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2806      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2807 
2808        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2809 
2810 .keywords: matrix, aij, compressed row, sparse, parallel
2811 
2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2813           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2814 @*/
2815 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2816 {
2817   PetscErrorCode ierr;
2818 
2819 
2820  PetscFunctionBegin;
2821   if (i[0]) {
2822     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2823   }
2824   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2825   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2826   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2827   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2828   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2835 /*@C
2836    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2837    (the default parallel PETSc format).
2838 
2839    Collective on MPI_Comm
2840 
2841    Input Parameters:
2842 +  A - the matrix
2843 .  bs - the block size
2844 .  i - the indices into j for the start of each local row (starts with zero)
2845 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2846 -  v - optional values in the matrix
2847 
2848    Level: developer
2849 
2850 .keywords: matrix, aij, compressed row, sparse, parallel
2851 
2852 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2853 @*/
2854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2855 {
2856   PetscErrorCode ierr;
2857 
2858   PetscFunctionBegin;
2859   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 
2864